//
// LiDIA - a library for computational number theory
//   Copyright (c) 1994, 1995 by the LiDIA Group
//
// File        : fft_arith.c
// Author      : Victor Shoup, Thomas Pfahler (TPf)
// Last change : TPf, Feb 29, 1996, initial version
//

#if defined(HAVE_MAC_DIRS) || defined(__MWERKS__)
#include <LiDIA:Fp_polynomial_fft.h>
#else
#include <LiDIA/Fp_polynomial_fft.h>
#endif



/***************************************************************
	This File contains the implementation of the functions
	-	fft_mul, fft_sqr
	-	fft_rem, fft_div, fft_div_rem (and copy_reverse)
	-	newton_inv (and class poly_mod_rep)
	- 	build_from_roots (and two auxiliary functions)
***************************************************************/



/***************************************************************
			fft_mul, fft_sqr
***************************************************************/

void fft_mul(Fp_polynomial& x, const Fp_polynomial& a, const Fp_polynomial& b)
{
	debug_handler( "fftmul.c", "fft_mul( Fp_polynomial&, Fp_polynomial&, Fp_polynomial& )" );

	lidia_size_t deg_a = a.degree();
	lidia_size_t deg_b = b.degree();

	if (deg_a<0 || deg_b<0)	//zero polynomial has degree -1
	{
	  	x.MOD = a.MOD;
		x.assign_zero();
		return;
	}

	lidia_size_t d = deg_a + deg_b;
	lidia_size_t k = next_power_of_two(d+1);

	modular_fft_rep R1(k, a.MOD);
	modular_fft_rep R2(R1);

	for (lidia_size_t index = 0; index < R1.number_of_primes(); index++)
	{
		R1.to_modular_fft_rep(a,index);
		R2.to_modular_fft_rep(b,index);
		multiply(R1, R1, R2, index);
		R1.from_modular_fft_rep(0, d, index);
	}
	R1.get_result(x, 0, d);

}


void fft_sqr(Fp_polynomial& x, const Fp_polynomial& a)
{
	debug_handler( "fftmul.c", "fft_sqr( Fp_polynomial&, Fp_polynomial& )" );
	
	lidia_size_t deg_a = a.degree();
	if (deg_a<0)
	{
	  	x.MOD = a.MOD;
		x.assign_zero();
		return;
	}

	lidia_size_t d = 2*deg_a;
	lidia_size_t k = next_power_of_two(d+1);

	modular_fft_rep R1(k, a.MOD);

	for (lidia_size_t index = 0; index < R1.number_of_primes(); index++)
	{
		R1.to_modular_fft_rep(a, index);
		multiply(R1, R1, R1, index);
		R1.from_modular_fft_rep(0, d, index);
	}
	R1.get_result(x, 0, d);

}


/***************************************************************
			fft_rem, -div, -div_rem
***************************************************************/

void copy_reverse(Fp_polynomial& x, const Fp_polynomial& a, lidia_size_t lo, lidia_size_t hi);
//used in fft_rem, -div, -div_rem


void fft_rem(Fp_polynomial& r, const Fp_polynomial& a, const Fp_polynomial& b)
{
	debug_handler( "fftrem.c", "fft_rem ( Fp_polynomial&, Fp_polynomial&, Fp_polynomial& )" );
	
	lidia_size_t deg_b = b.degree(), deg_a = a.degree();
	if (deg_a < deg_b)
	{
		r.assign( a );
		return;
	}
	
	lidia_size_t m = deg_a - deg_b + 1;
	Fp_polynomial P1, P2, P3;
	copy_reverse(P3, b, 0, deg_b);
	invert(P2, P3, m);
	copy_reverse(P1, P2, 0, m-1);

	lidia_size_t k = next_power_of_two(2*m-1);
	lidia_size_t l = next_power_of_two(deg_b);
	lidia_size_t index;
	lidia_size_t mx = comparator<lidia_size_t>::max(k, l);

	modular_fft_rep R1(mx, a.MOD);
	modular_fft_rep R2(R1);
	R1.set_size(k);
	R2.set_size(k);

	for (index = 0; index < R1.number_of_primes(); index++)
	{
		R1.to_modular_fft_rep(P1, index);
		R2.to_modular_fft_rep(a, deg_b, deg_a, index);
		multiply(R1, R1, R2, index);
		R1.from_modular_fft_rep(deg_a-deg_b, 2*(deg_a-deg_b), index);
	}
	R1.get_result(P3, deg_a-deg_b, 2*(deg_a-deg_b));

	R1.set_size(l);
	R2.set_size(l);
	for (index = 0; index < R1.number_of_primes(); index++)
	{
		R1.to_modular_fft_rep(b, 0, b.degree(), index);
		R2.to_modular_fft_rep(P3, index);
		multiply(R1, R1, R2, index);
		R1.from_modular_fft_rep(0, deg_b-1, index);
	}
	R1.get_result(P3, 0, deg_b-1);

	lidia_size_t L = 1 << l;
	cyclic_reduce(P2, a, L);
	trunc(r, P2, deg_b);
	subtract(r, r, P3);
}



void fft_div(Fp_polynomial& q, const Fp_polynomial& a, const Fp_polynomial& b)
{
	debug_handler( "fftrem.c", "fft_div ( Fp_polynomial&, Fp_polynomial&, Fp_polynomial& )" );

	lidia_size_t deg_b = b.degree(), deg_a = a.degree();
	if (deg_a < deg_b)
	{
	  	q.MOD = a.MOD;
		q.assign_zero();
		return;
	}

	lidia_size_t m = deg_a - deg_b + 1;
	Fp_polynomial P1, P2, P3;
	copy_reverse(P3, b, 0, deg_b);
	invert(P2, P3, m);
	copy_reverse(P1, P2, 0, m-1);

	lidia_size_t k = next_power_of_two(2*m-1);

	modular_fft_rep R1(k, a.MOD);
	modular_fft_rep R2(R1);

	for (lidia_size_t index = 0; index < R1.number_of_primes(); index++)
	{
		R1.to_modular_fft_rep(P1, index);
		R2.to_modular_fft_rep(a, deg_b, deg_a, index);
		
		multiply(R1, R1, R2, index);
		R1.from_modular_fft_rep(deg_a-deg_b, 2*(deg_a-deg_b), index);
	}
	R1.get_result(q, deg_a-deg_b, 2*(deg_a-deg_b));
}



void fft_div_rem(Fp_polynomial& q, Fp_polynomial& r, const Fp_polynomial& a, const Fp_polynomial& b)
{
	debug_handler( "fftrem.c", "fft_div_rem ( Fp_polynomial&, Fp_polynomial&, Fp_polynomial&, Fp_polynomial& )" );
	
	lidia_size_t deg_b = b.degree(), deg_a = a.degree();
	if (deg_a < deg_b)
	{
	  	q.MOD = a.MOD;
		q.assign_zero();
		r.assign( a );
		return;
	}

	Fp_polynomial P1, P2, P3;
	copy_reverse(P3, b, 0, deg_b);
	invert(P2, P3, deg_a-deg_b+1);
	copy_reverse(P1, P2, 0, deg_a-deg_b);


	lidia_size_t k = next_power_of_two(2*(deg_a-deg_b)+1);
	lidia_size_t l = next_power_of_two(deg_b);
	lidia_size_t index;
	lidia_size_t mx = comparator<lidia_size_t>::max(k, l);

	modular_fft_rep R1(mx, a.MOD);
	modular_fft_rep R2(R1);
	R1.set_size(k);
	R2.set_size(k);

	for (index = 0; index < R1.number_of_primes(); index++)
	{
		R1.to_modular_fft_rep(P1, index);
		R2.to_modular_fft_rep(a, deg_b, deg_a, index);
		multiply(R1, R1, R2, index);
		R1.from_modular_fft_rep(deg_a-deg_b, 2*(deg_a-deg_b), index);
	}
	R1.get_result(P3, deg_a-deg_b, 2*(deg_a-deg_b));

	R1.set_size(l);
	R2.set_size(l);
	for (index = 0; index < R1.number_of_primes(); index++)
	{
		R1.to_modular_fft_rep(b, index);
		R2.to_modular_fft_rep(P3, index);
		multiply(R1, R1, R2, index);
		R1.from_modular_fft_rep(0, deg_b-1, index);
	}
	R1.get_result(P1, 0, deg_b-1);

	lidia_size_t L = 1 << l;
	cyclic_reduce(P2, a, L);
	trunc(r, P2, deg_b);
	subtract(r, r, P1);
	q.assign( P3 );
}



// x[0..hi-lo+1] = reverse(a[lo..hi]), with zero fill
// input may not alias output
void copy_reverse(Fp_polynomial& x, const Fp_polynomial& a, lidia_size_t lo, lidia_size_t hi)
{
	debug_handler( "fftrem.c", "copy_reverse( Fp_polynomial&, Fp_polynomial&, lidia_size_t, lidia_size_t) ");
	lidia_size_t i, j, n, m;

	n = hi-lo+1;
	m = a.degree()+1;   // = coeff.size();

	x.MOD = a.MOD;
	x.set_degree(n-1);

	const bigint* ap = a.coeff;
	bigint* xp = x.coeff;

	for (i = 0; i < n; i++)
	{
		j = hi-i;
		if (j < 0 || j >= m)
			xp[i].assign_zero();
		else
			xp[i] = ap[j];
	}

	x.remove_leading_zeros();
}



/***************************************************************
			newton_inv, class poly_mod_rep
***************************************************************/


class poly_mod_rep
{

// This data structure holds unconvoluted modular representations
// of polynomials
// used only in function newton_inv

	lidia_size_t num_of_primes;
	lidia_size_t size, max_size;
	sdigit **tbl;
	lidia_size_t k;	//primes for convolutions of max. degree 2^k
	fft_data F;

#if 0
// disable
	poly_mod_rep() 
	{
		lidia_error_handler( "poly_mod_rep", "poly_mod_rep()::not implemented" );
	}
	poly_mod_rep(const poly_mod_rep&)
	{
	  	lidia_error_handler( "poly_mod_rep", "poly_mod_rep(poly_mod_rep&)::not implemented" );
	}
#endif

	poly_mod_rep(const Fp_polynomial& a, lidia_size_t lo, lidia_size_t hi, lidia_size_t l);
	
	~poly_mod_rep();

	

	void set_size(lidia_size_t NewN);


// friends
	friend void newton_inv(Fp_polynomial& x, const Fp_polynomial& a, lidia_size_t m);
	friend void modular_fft_rep::to_modular_fft_rep(const poly_mod_rep &a, lidia_size_t lo,
				lidia_size_t hi, lidia_size_t index);



#if 0
	const sdigit* get_rep(lidia_size_t index) const
	{
    	debug_handler( "poly_mod_rep", "get_rep ( lidia_size_t )" );
	    if ( index < 0 || index > num_of_primes )
    	    lidia_error_handler( "poly_mod_rep", "get_rep ( lidia_size_t )::out of range" );
    	return tbl[index];
	}

	const sdigit get_rep(lidia_size_t index, lidia_size_t m) const
	{
    	debug_handler( "poly_mod_rep", "get_rep ( lidia_size_t, lidia_size_t )" );
	    if ( index<0 || index>num_of_primes || m<0 || m>size )
    	    lidia_error_handler( "poly_mod_rep", "get_rep ( lidia_size_t, lidia_size_t )::out of range" );
	    return tbl[index][m];
	}
#endif

}; //end class poly_mod_rep



void poly_mod_rep::set_size(lidia_size_t new_size)
{
	debug_handler( "poly_mod_rep", "set_size ( lidia_size_t )" );
	lidia_size_t i;
	if (new_size < 0)
		lidia_error_handler("poly_mod_rep", "set_size ( lidia_size_t )::bad arguments" );
	if (new_size <= max_size)
	{
		size = new_size;
		return;
	}

	if (max_size <= 0)
	{
		tbl = new sdigit*[num_of_primes];
		if (!tbl)
			lidia_error_handler("poly_mod_rep", "set_size ( lidia_size_t )::out of space" );
	}
	else
	{
		for (i = 0; i < num_of_primes; i++)
			delete[] tbl[i];
	}
	for (i = 0; i < num_of_primes; i++)
	{
		if ( !(tbl[i] = new sdigit[new_size]) )
			lidia_error_handler("poly_mod_rep", "set_size ( lidia_size_t )::out of space" );
	}

	size = max_size = new_size;
}


poly_mod_rep::poly_mod_rep(const Fp_polynomial& a, lidia_size_t lo, lidia_size_t hi, lidia_size_t l)
{
	debug_handler( "poly_mod_rep", "poly_mod_rep ( Fp_polynomial&, lidia_size_t, lidia_size_t, lidia_size_t )" );
	
	k = l;
	F.init(l, a.MOD);
	crt help(F.crttable());
	num_of_primes = help.number_of_primes();
	max_size = 0;

	if (lo < 0 || hi < 0 || k < 0)
		lidia_error_handler( "poly_mod_rep", "poly_mod_rep ( Fp_polynomial&, lidia_size_t, lidia_size_t, lidia_size_t )::negative arguments" );

	hi = comparator<lidia_size_t>::min(hi, a.degree());
	lidia_size_t n = comparator<lidia_size_t>::max(hi-lo+1, 0);
	lidia_size_t i;

	this->set_size(n);

	for (i = 0; i < num_of_primes; i++)
		help.reduce(tbl[i], &a[lo], n, i);
}


poly_mod_rep::~poly_mod_rep()
{
	debug_handler( "poly_mod_rep", "~poly_mod_rep ()" );
	if (max_size == 0)
		return;

	for (lidia_size_t i = 0; i < num_of_primes; i++)
		delete[] tbl[i];
	delete[] tbl;
}


void modular_fft_rep::to_modular_fft_rep(const poly_mod_rep &a, lidia_size_t lo, lidia_size_t hi, lidia_size_t index)
// converts coefficients lo..hi to a 2^k-point fft_rep.
// must have hi-lo+1 < 2^k
{
	debug_handler( "modular_fft_rep", "to_modular_fft_rep( poly_mod_rep&, lidia_size_t, lidia_size_t, lidia_size_t )" );
	sdigit *uptr = &stat_vec[0];
	
	if (k > a.k)
	  	lidia_error_handler( "modular_fft_rep", "to_modular_fft_rep( poly_mod_rep&, lidia_size_t, lidia_size_t, lidia_size_t )::primes for poly_mod_rep do not fit" );

	if (k < 0 || lo < 0)
		lidia_error_handler( "modular_fft_rep", "to_modular_fft_rep( poly_mod_rep&, lidia_size_t, lidia_size_t, lidia_size_t )::bad args" );
	if (hi > a.size-1)
		hi = a.size-1;
	lidia_size_t K = 1 << k;
	lidia_size_t j, m = comparator<lidia_size_t>::max(hi-lo + 1, 0);

	if (m > K)
		lidia_error_handler( "modular_fft_rep", "to_modular_fft_rep( poly_mod_rep&, lidia_size_t, lidia_size_t, lidia_size_t ):: hi-lo+1 is too large" );

	sdigit *ap = (m == 0 ? 0 : &a.tbl[index][0]);

	for (j = 0; j < m; j++)
		uptr[j] = ap[lo+j];
	for (j = m; j < K; j++)
		uptr[j] = 0;

	F.evaluate(s, uptr, k, index);
	//FFT(s, uptr, k, C.getprime(index), &RootTable[index][0]);
}






void newton_inv(Fp_polynomial& x, const Fp_polynomial& a, lidia_size_t m)
{
	debug_handler( "fftrem.c", "newton_inv( Fp_polynomial&, Fp_polynomial&, lidia_size_t )" );
	x.set_degree(m-1);
	x.MOD = a.MOD;
	const bigint & p = a.modulus();
	
	lidia_size_t index, t, k;
	lidia_size_t crov = 
	    Fp_polynomial::crossovers.log2_newton_crossover(a.modulus());

	plain_inv(x, a, (1<<crov));
	t = next_power_of_two(m);

	fft_rep R1(t, a.MOD);
	modular_fft_rep R2(R1);
	Fp_polynomial P1;
	P1.set_max_degree(m/2 - 1);

	lidia_size_t a_len = comparator<lidia_size_t>::min(m, a.c_length);

	poly_mod_rep a_rep(a, 0, a_len-1, t);

	t = crov;
	k = 1 << t;

	while (k < m)
	{
		lidia_size_t l = comparator<lidia_size_t>::min(2*k, m);

		R1.set_size(t+1);
		R2.set_size(t+1);

		R1.to_fft_rep(x);
		for (index = 0; index < R2.number_of_primes(); index++)
		{
			R2.to_modular_fft_rep(a_rep, 0, l-1, index);
			multiply(R2, R1, R2, index);
			R2.from_modular_fft_rep(k, l-1, index);
		}
		R2.get_result(P1, k, l-1);

		R2.set_size(t+1);
		for (index = 0; index < R2.number_of_primes(); index++)
		{
			R2.to_modular_fft_rep(P1, index);
			multiply(R2, R1, R2, index);
			R2.from_modular_fft_rep(0, l-k-1, index);
		}
		R2.get_result(P1, 0, l-k-1);

		x.set_degree(l-1);

		lidia_size_t y_len = P1.c_length;
		for (lidia_size_t i = k; i < l; i++)
		{
			if (i-k >= y_len)
				(x.coeff[i]).assign_zero();
			else
				NegateMod(x.coeff[i], P1.coeff[i-k], p);
		}
		x.remove_leading_zeros();

		t++;
		k = l;
	}
}




/***************************************************************
				build_from_roots
***************************************************************/


void iter_build(bigint* a, lidia_size_t n, const bigint& p);
void mul_build(bigint* x, const bigint* a, const bigint* b, lidia_size_t n, const bigint& p);
//used in build_from_roots


// computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length()
void Fp_polynomial::build_from_roots(const base_vector<bigint>& a)
{
	debug_handler( "Fp_polynomial", "build_from_roots( base_vector<bigint>& )" );
	
	const bigint& p = modulus();
	if (p.is_zero())
	  	error_handler( "Fp_polynomial", "build_from_roots( base_vector<bigint>& )::modulus was not set" );
		
	lidia_size_t n = a.size();
	if (n == 0)
	{
		this->assign_one();
		return;
	}

	lidia_size_t crov = Fp_polynomial::crossovers.fftmul_crossover(p);
	lidia_size_t k0 = next_power_of_two(crov);
	lidia_size_t crossover = 1 << k0;

	if (n <= crossover)
	{
		set_max_degree(n);
		assign(a, p);
		iter_build(coeff, n, p);
		set_degree(n);
		set_coefficient(n);
		return;
	}

	lidia_size_t k = next_power_of_two(n);
	lidia_size_t m = 1 << k;
	lidia_size_t i, j, index, l, width;

	Fp_polynomial b;

	b.assign(a, p);
	b.set_degree(m);

	for (i = n; i < m; i++)
		b[i].assign_zero();

	b[m].assign_one();

	bigint t1, one(1);

	bigint* g = new bigint[crossover];
	bigint* h = new bigint[crossover];
	bigint *tmp;

	if (!h || !g)
	    lidia_error_handler( "Fp_polynomial", "build_from_roots( base_vector<bigint>& )::out of space" );

	for (i = 0; i < m; i+= crossover)
	{
		for (j = 0; j < crossover; j++)
			::NegateMod(g[j], b[i+j], p);

		if (k0 > 0)
		{
			for (j = 0; j < crossover; j+=2)
			{
				MulMod(t1, g[j], g[j+1], p);
				AddMod(g[j+1], g[j], g[j+1], p);
				g[j].assign( t1 );
			}
		}

		for (l = 1; l < k0; l++)
		{
			width = 1 << l;
			for (j = 0; j < crossover; j += 2*width)
				mul_build(&h[j], &g[j], &g[j+width], width, p);
			tmp = g; g = h; h = tmp;
		}

		for (j = 0; j < crossover; j++)
			b[i+j].assign( g[j] );
	}

	modular_fft_rep R1(k,b.MOD);
	modular_fft_rep R2(R1);

	for (l = k0; l < k; l++)
	{
		width = 1 << l;
		for (i = 0; i < m; i += 2*width)
		{
			R1.set_size(l+1);
			R2.set_size(l+1);
			for (index = 0; index < R1.number_of_primes(); index++)
			{
				swap(one,b.coeff[i+width]);
				//t1 = b[i+width]; (b[i+width]).assign_one();

				R1.to_modular_fft_rep(b, i, i+width, index);

				swap(one,b.coeff[i+width]);
				swap(b.coeff[i+2*width],one);
				//b[i+width] = t1; t1 = b[i+2*width]; b[i+2*width].assign_one();

				R2.to_modular_fft_rep(b, i+width, i+2*width, index);

				swap(b.coeff[i+2*width],one);
				//b[i+2*width] = t1;

				multiply(R1, R1, R2, index);
				R1.from_modular_fft_rep(0, 2*width-1, index);
			}
			R1.get_result_ptr(&b.coeff[i], 0, 2*width-1);
			dec(b.coeff[i]);
			//subtract(b[i], b[i], one);
		}
	}

	set_degree(n);
	lidia_size_t delta = m-n;
	for (i = 0; i <= n; i++)
		coeff[i].assign( b[i+delta] );

   // no need to normalize
}



void iter_build(bigint* a, lidia_size_t n, const bigint& p)
{
	debug_handler( "Fp_polynomial", "iter_build( bigint*, lidia_size_t, const bigint& )" );
	lidia_size_t i, k;
	bigint b, t;

	if (n <= 0) return;

	NegateMod(a[0], a[0], p);

	for (k = 1; k < n; k++)
	{
		NegateMod(b, a[k], p);
		AddMod(a[k], b, a[k-1], p);
		for (i = k-1; i > 0; i--)
		{
			MulMod(t, a[i], b, p);
			AddMod(a[i], t, a[i-1], p);
		}
		MulMod(a[0], a[0], b, p);
	}
}


void mul_build(bigint* x, const bigint* a, const bigint* b, lidia_size_t n, const bigint& p)
{
	debug_handler( "Fp_polynomial", "mul_build( bigint*, const bigint*, const bigint*, lidia_size_t, const bigint& )" );
	bigint t, accum; //static
	lidia_size_t i, j, jmin, jmax;

	lidia_size_t d = 2*n-1;

	for (i = 0; i <= d; i++)
	{
		jmin = comparator<lidia_size_t>::max(0, i-(n-1));
		jmax = comparator<lidia_size_t>::min(n-1, i);
		accum.assign_zero();
		for (j = jmin; j <= jmax; j++)
		{
			multiply(t, a[j], b[i-j]);
			add(accum, accum, t);
		}
		if (i >= n)
		{
			add(accum, accum, a[i-n]);
			add(accum, accum, b[i-n]);
		}

		Remainder(x[i], accum, p);
	}
}

