/* kdtree.c
 * 
 * Copyright (C) 2005 2006 Toon Calders, Bart Goethals, Szymon Jaroszewicz
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <stdio.h>
#include <limits.h>
#include <float.h>
#include <math.h>

#include "kdtree.h"
#include "dataset.h"
#include "qselect.h"

static void
build_kdtree_rec(double ** data, size_t n, size_t n_dim, size_t * dims, size_t dim)
{
    size_t i = n / 2;

    if(n <= 1) return;
    quickselect(data, n, i, dims[dim]);
    // test if qselect worked correctly
    //size_t j;
    //for(j = 0; j <= i; j++)
    //{
    //	if(data[j][dims[dim]] > data[i][dims[dim]])
    //	    printf("errrr\n");
    //}
    //for(j = i; j < n; j++)
    //{
    //	if(data[j][dims[dim]] < data[i][dims[dim]])
    //	    printf("errrr\n");
    //}
    build_kdtree_rec(data, i, n_dim, dims, (dim + 1) % n_dim);
    build_kdtree_rec(data + i + 1, n - i - 1, n_dim, dims, (dim + 1) % n_dim);
}


void KDtree_init(KDtree * kdtree, dataset * d, size_t n_dim, size_t * dims)
{
    size_t n;

    n = dataset_put_nans_at_end(d, dims, n_dim);

    kdtree->data = d->data;
    kdtree->n = n;
    kdtree->n_dim = n_dim;
    kdtree->dims = dims;

    build_kdtree_rec(d->data, n, n_dim, dims, 0);
}

//static size_t KDtree_join_rec(double ** data1, size_t n1,
//			      double ** data2, size_t n2,
//			      size_t * dims, size_t n_dims, size_t dim,
//			      int * active_dims, size_t unsatisfied_dims)
//{
//    size_t i1 = n1 / 2;
//    size_t i2 = n2 / 2;
//    size_t ret;
//    double v1, v2;
//    double tmp;
//
//    if(n1 == 0 || n2 == 0)
//	return 0;
//
//    {
//	size_t j;
//	ret = 1;
//	for(j = 0; j < n_dims; j++)
//	    if(data1[i1][dims[j]] >= data2[i2][dims[j]])
//	    {
//		ret = 0;
//		break;
//	    }
//    }
//
//
//    v1 = data1[i1][dims[dim]];
//    v2 = data2[i2][dims[dim]];
//
//
//    if(v1 <= v2)
//    ret += KDtree_join_rec(data1 + i1, 1, data2, i2,
//			   dims, n_dims, (dim + 1) % n_dims, active_dims, unsatisfied_dims);
//    ret += KDtree_join_rec(data1 + i1, 1, data2 + i2 + 1, n2 - i2 - 1,
//    			   dims, n_dims, (dim + 1) % n_dims, active_dims, unsatisfied_dims);
//	
//
//    ret += KDtree_join_rec(data1, i1, data2 + i2, 1,
//			   dims, n_dims, (dim + 1) % n_dims, active_dims, unsatisfied_dims);
//    if(v2 >= v1)
//    ret += KDtree_join_rec(data1 + i1 + 1, n1 - i1 - 1, data2 + i2, 1,
//			   dims, n_dims, (dim + 1) % n_dims, active_dims, unsatisfied_dims);
//
//
//    ret += KDtree_join_rec(data1, i1, data2, i2,
//			   dims, n_dims, (dim + 1) % n_dims, active_dims, unsatisfied_dims);
//    if(n2 - i2 - 1 > 0 && i1 > 0)
//    {
//	if(v1 < v2 && active_dims[dim])
//	{
//	    if(unsatisfied_dims == 1) // this is the last unsatisfied dimension
//	    {
//		//printf("!!!!%d\n",i1 * (n2 - i2 - 1) );
//		ret += i1 * (n2 - i2 - 1);
//	    }
//	    else
//	    {
//		active_dims[dim] = 0;
//		ret += KDtree_join_rec(data1, i1, data2 + i2 + 1, n2 - i2 - 1,
//				       dims, n_dims, (dim + 1) % n_dims, active_dims, unsatisfied_dims - 1);
//		active_dims[dim] = 1;
//	    }
//	}
//	else
//	    ret += KDtree_join_rec(data1, i1, data2 + i2 + 1, n2 - i2 - 1,
//				   dims, n_dims, (dim + 1) % n_dims, active_dims, unsatisfied_dims);
//    }
//    if(n1 - i1 - 1 > 0)
//    {
//	if(i2 > 0 && v1 <= v2)
//	    ret += KDtree_join_rec(data1 + i1 + 1, n1 - i1 - 1, data2, i2,
//				   dims, n_dims, (dim + 1) % n_dims, active_dims, unsatisfied_dims);
//	if(n2 - i2 - 1 > 0)
//	    ret += KDtree_join_rec(data1 + i1 + 1, n1 - i1 - 1, data2 + i2 + 1, n2 - i2 - 1,
//				   dims, n_dims, (dim + 1) % n_dims, active_dims, unsatisfied_dims);
//    }
//
//    return ret;
//}
//
///* find number of pairs of records in both trees which are less on
//   every coordinate */
//size_t
//KDtree_join(KDtree * kdtree1, KDtree * kdtree2)
//{
//    size_t ret;
//    int * active_dims;
//    size_t i;
//
//    active_dims = malloc(sizeof(int) * kdtree1->n_dim);
//    for(i = 0; i < kdtree1->n_dim; i++)
//    {
//	active_dims[i] = 1;
//    }
//    ret = KDtree_join_rec(kdtree1->data, kdtree1->n,
//			  kdtree2->data, kdtree2->n,
//			  kdtree1->dims, kdtree1->n_dim, 0,
//			  active_dims, kdtree1->n_dim);
//    free(active_dims);
//    return ret;
//}


static size_t
kdtree_count_rec(double ** data, size_t n, size_t n_dims, const double * bounds,
		 size_t dim, size_t * dims, int * active_dims,
		 size_t unsatisfied_bounds, size_t * mark_lt)
{
    size_t i = n / 2;
    size_t ret;
    double v;
    size_t j;

    if(n == 0)
	return 0;

    ret = 1;
    for(j = 0; j < n_dims; j++)
    	if(isnan(data[i][dims[j]]) || isnan(bounds[j]) || data[i][dims[j]] >= bounds[j])
    	{
    	    ret = 0;
    	    break;
    	}
    if(mark_lt != NULL && ret == 1)
    {
	mark_lt[i] |= 1;
    }
    
    v = data[i][dims[dim]];
    if(v < bounds[dim])
    {
	ret += kdtree_count_rec(data + i + 1, n - i - 1, n_dims, bounds, (dim + 1) % n_dims, dims,
				active_dims, unsatisfied_bounds, mark_lt + i + 1);
    }

    if(active_dims[dim] == 1 && !isnan(v) && !isnan(bounds[dim]) && v < bounds[dim])
    {
	if(unsatisfied_bounds == 1)
	{
	    mark_lt[i] |= 2;
	    ret += i;
	}
	else
	{
	    active_dims[dim] = 0;
	    ret += kdtree_count_rec(data, i, n_dims, bounds, (dim + 1) % n_dims, dims,
				    active_dims,
				    unsatisfied_bounds - 1, mark_lt);
	    active_dims[dim] = 1;
	}
    }
    else
	ret += kdtree_count_rec(data, i, n_dims, bounds, (dim + 1) % n_dims, dims,
				active_dims,
				unsatisfied_bounds, mark_lt);
    return ret;
}

size_t
KDtree_count(KDtree * kdtree, const double * bounds)
{
    size_t ret;
    int * active_dims;
    size_t i;

    active_dims = (int *) malloc(kdtree->n_dim * sizeof(int));
    for(i = 0; i < kdtree->n_dim; i++)
    {
	active_dims[i] = 1;
    }

    ret = kdtree_count_rec(kdtree->data, kdtree->n, kdtree->n_dim, bounds,
			   0, kdtree->dims, active_dims, kdtree->n_dim, NULL);
    free(active_dims);
    return ret;
}


size_t
KDtree_join_rec(double ** greater_data, size_t greater_n,// KDtree * lesstree,
		double ** less_data, size_t less_n,
		size_t greater_dim, size_t less_dim, 
		size_t * dims, size_t n_dim,
		int * active_dims, size_t unsatisfied_dims,
		size_t * mark_gt, size_t * mark_lt)
{
    size_t supp;
    size_t s;
    size_t i = greater_n / 2;
    size_t i2 = less_n / 2;
    static double bounds[5000];


    if(greater_n == 0 || less_n == 0)
	return 0;

    for(s = 0; s < n_dim; s++)
	bounds[s] = greater_data[i][dims[s]];
    supp = kdtree_count_rec(less_data, less_n, n_dim, bounds, less_dim, dims, active_dims, unsatisfied_dims, mark_lt);
    //printf("supp=%d\n", supp);
    if(supp > 0)
    {
	mark_gt[i] = 1;	// still has something smaller
    }
    if(greater_dim == less_dim && !isnan(greater_data[i][dims[greater_dim]]) && !isnan(less_data[i2][dims[less_dim]]) &&
       greater_data[i][dims[greater_dim]] <= less_data[i2][dims[less_dim]])
    {
	supp += KDtree_join_rec(greater_data, i, less_data, i2, (greater_dim + 1) % n_dim, (less_dim + 1) % n_dim, dims, n_dim, active_dims, unsatisfied_dims, mark_gt, mark_lt);
	supp += KDtree_join_rec(greater_data + i + 1, greater_n - i - 1, less_data, less_n, (greater_dim + 1) % n_dim, less_dim, dims, n_dim, active_dims, unsatisfied_dims, mark_gt + i + 1, mark_lt);
    }
    else
    {
	supp += KDtree_join_rec(greater_data, i, less_data, less_n, (greater_dim + 1) % n_dim, less_dim, dims, n_dim, active_dims, unsatisfied_dims, mark_gt, mark_lt);
	supp += KDtree_join_rec(greater_data + i + 1, greater_n - i - 1, less_data, less_n, (greater_dim + 1) % n_dim, less_dim, dims, n_dim, active_dims, unsatisfied_dims, mark_gt + i + 1, mark_lt);


	//if(greater_dim == less_dim && greater_data[i][dims[greater_dim]] > less_data[i2][dims[less_dim]])
	//{
	//    supp += KDtree_join_rec(greater_data, i, less_data, less_n, (greater_dim + 1) % n_dim, less_dim, dims, n_dim, active_dims, unsatisfied_dims);
	//
	//    int flag = 0;
	//    flag = 0;
	//    if(active_dims[greater_dim] == 1)
	//    {
	//	active_dims[greater_dim] = 0;
	//	unsatisfied_dims--;
	//	flag = 1;
	//    }
	//    supp += KDtree_join_rec(greater_data + i + 1, greater_n - i - 1, less_data, i2, (greater_dim + 1) % n_dim, (less_dim + 1) % n_dim, dims, n_dim, active_dims, unsatisfied_dims);
	//    supp += KDtree_join_rec(greater_data + i + 1, greater_n - i - 1, less_data + i2, 1, (greater_dim + 1) % n_dim, (less_dim + 1) % n_dim, dims, n_dim, active_dims, unsatisfied_dims);
	//    if(flag)
	//    {
	//	unsatisfied_dims++;
	//	active_dims[greater_dim] = 1;
	//    }
	//    supp += KDtree_join_rec(greater_data + i + 1, greater_n - i - 1, less_data + i2 + 1, less_n - i2 - 1, (greater_dim + 1) % n_dim, (less_dim + 1) % n_dim, dims, n_dim, active_dims, unsatisfied_dims);
	//}
	//else
	//{
	//    supp += KDtree_join_rec(greater_data, i, less_data, less_n, (greater_dim + 1) % n_dim, less_dim, dims, n_dim, active_dims, unsatisfied_dims);
	//    supp += KDtree_join_rec(greater_data + i + 1, greater_n - i - 1, less_data, less_n, (greater_dim + 1) % n_dim, less_dim, dims, n_dim, active_dims, unsatisfied_dims);
	//}
    }
    return supp;
}

static void 
fix_mark_lt_rec(size_t * mark_lt, size_t n)
{
    size_t i = n/2, j;

    if(n == 0)
    {
	return;
    }
    if(mark_lt[i] & 0x2)
    {
	for(j = 0; j < i; j++)
	    mark_lt[j] = 1;
	mark_lt[i] &= 0x1;
    }
    else
    {
	fix_mark_lt_rec(mark_lt, i);
    }
    fix_mark_lt_rec(mark_lt + i + 1, n - i - 1);
}

size_t
KDtree_join(KDtree * greatertree, KDtree * lesstree, size_t * mark_gt, size_t * mark_lt)
{
    size_t supp;
    int * active_dims;
    size_t i;

    active_dims = (int *) malloc(greatertree->n_dim * sizeof(int));
    for(i = 0; i < greatertree->n_dim; i++)
    {
	active_dims[i] = 1;
    }
    for(i = 0; i < greatertree->n; i++)
    {
	mark_gt[i] = 0;
	mark_lt[i] = 0;
    }
    supp = KDtree_join_rec(greatertree->data, greatertree->n,// lesstree,
			   lesstree->data, lesstree->n,
			   0, 0,
			   lesstree->dims, lesstree->n_dim,
			   active_dims, lesstree->n_dim,
			   mark_gt, mark_lt);
    // fix marks for mark_lt
    fix_mark_lt_rec(mark_lt, lesstree->n);
    free(active_dims);
    return supp;
}
