src/gmp-6.1.0/mpn/generic/mu_div_q.c - stadia-controller/gcc-arm-none-eabi - Git at Google

 /* mpn_mu_div_q.

    Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.

    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.

 Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.

 This file is part of the GNU MP Library.

 The GNU MP Library is free software; you can redistribute it and/or modify
 it under the terms of either:

   * the GNU Lesser General Public License as published by the Free
     Software Foundation; either version 3 of the License, or (at your
     option) any later version.

 or

   * the GNU General Public License as published by the Free Software
     Foundation; either version 2 of the License, or (at your option) any
     later version.

 or both in parallel, as here.

 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received copies of the GNU General Public License and the
 GNU Lesser General Public License along with the GNU MP Library.  If not,
 see https://www.gnu.org/licenses/.  */


 /*
    The idea of the algorithm used herein is to compute a smaller inverted value
    than used in the standard Barrett algorithm, and thus save time in the
    Newton iterations, and pay just a small price when using the inverted value
    for developing quotient bits.  This algorithm was presented at ICMS 2006.
 */

 /*
   Things to work on:

   1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
      probably close to optimal, except when mpn_mu_divappr_q fails.

   2. We used to fall back to mpn_mu_div_qr when we detect a possible
      mpn_mu_divappr_q rounding problem, now we multiply and compare.
      Unfortunately, since mpn_mu_divappr_q does not return the partial
      remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
      solve that.

   3. The allocations done here should be made from the scratch area, which
      then would need to be amended.
 */

 #include <stdlib.h>		/* for NULL */
 #include "gmp.h"
 #include "gmp-impl.h"


 mp_limb_t
 mpn_mu_div_q (mp_ptr qp,
 	      mp_srcptr np, mp_size_t nn,
 	      mp_srcptr dp, mp_size_t dn,
 	      mp_ptr scratch)
 {
   mp_ptr tp, rp;
   mp_size_t qn;
   mp_limb_t cy, qh;
   TMP_DECL;

   TMP_MARK;

   qn = nn - dn;

   tp = TMP_BALLOC_LIMBS (qn + 1);

   if (qn >= dn)			/* nn >= 2*dn + 1 */
     {
        /* |_______________________|   dividend
 			 |________|   divisor  */

       rp = TMP_BALLOC_LIMBS (nn + 1);
       MPN_COPY (rp + 1, np, nn);
       rp[0] = 0;

       qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
       if (qh != 0)
 	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);

       cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);

       if (UNLIKELY (cy != 0))
 	{
 	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
 	     canonically reduced, replace the returned value of B^(qn-dn)+eps
 	     by the largest possible value.  */
 	  mp_size_t i;
 	  for (i = 0; i < qn + 1; i++)
 	    tp[i] = GMP_NUMB_MAX;
 	}

       /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
 	 smaller than the max error, we cannot trust the quotient.  */
       if (tp[0] > 4)
 	{
 	  MPN_COPY (qp, tp + 1, qn);
 	}
       else
 	{
 	  mp_limb_t cy;
 	  mp_ptr pp;

 	  pp = rp;
 	  mpn_mul (pp, tp + 1, qn, dp, dn);

 	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;

 	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
 	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
 	  else /* Same as above */
 	    MPN_COPY (qp, tp + 1, qn);
 	}
     }
   else
     {
        /* |_______________________|   dividend
 		 |________________|   divisor  */

       /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
 	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
 	 the most significant dn-1 limbs will actually be read, but it is not
 	 pretty.  */

       qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
 			     dp + dn - (qn + 1), qn + 1, scratch);

       /* The max error of mpn_mu_divappr_q is +4, but we get an additional
          error from the divisor truncation.  */
       if (tp[0] > 6)
 	{
 	  MPN_COPY (qp, tp + 1, qn);
 	}
       else
 	{
 	  mp_limb_t cy;

 	  /* FIXME: a shorter product should be enough; we may use already
 	     allocated space... */
 	  rp = TMP_BALLOC_LIMBS (nn);
 	  mpn_mul (rp, dp, dn, tp + 1, qn);

 	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;

 	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
 	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
 	  else /* Same as above */
 	    MPN_COPY (qp, tp + 1, qn);
 	}
     }

   TMP_FREE;
   return qh;
 }

 mp_size_t
 mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
 {
   mp_size_t qn;

   qn = nn - dn;
   if (qn >= dn)
     {
       return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
     }
   else
     {
       return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
     }
 }
	/* mpn_mu_div_q.

	Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.

	THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
	SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
	GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.

	Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.

	This file is part of the GNU MP Library.

	The GNU MP Library is free software; you can redistribute it and/or modify
	it under the terms of either:

	* the GNU Lesser General Public License as published by the Free
	Software Foundation; either version 3 of the License, or (at your
	option) any later version.

	or

	* the GNU General Public License as published by the Free Software
	Foundation; either version 2 of the License, or (at your option) any
	later version.

	or both in parallel, as here.

	The GNU MP Library is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	for more details.

	You should have received copies of the GNU General Public License and the
	GNU Lesser General Public License along with the GNU MP Library. If not,
	see https://www.gnu.org/licenses/. */


	/*
	The idea of the algorithm used herein is to compute a smaller inverted value
	than used in the standard Barrett algorithm, and thus save time in the
	Newton iterations, and pay just a small price when using the inverted value
	for developing quotient bits. This algorithm was presented at ICMS 2006.
	*/

	/*
	Things to work on:

	1. This is a rudimentary implementation of mpn_mu_div_q. The algorithm is
	probably close to optimal, except when mpn_mu_divappr_q fails.

	2. We used to fall back to mpn_mu_div_qr when we detect a possible
	mpn_mu_divappr_q rounding problem, now we multiply and compare.
	Unfortunately, since mpn_mu_divappr_q does not return the partial
	remainder, this also doesn't become optimal. A mpn_mu_divappr_qr could
	solve that.

	3. The allocations done here should be made from the scratch area, which
	then would need to be amended.
	*/

	#include <stdlib.h> /* for NULL */
	#include "gmp.h"
	#include "gmp-impl.h"


	mp_limb_t
	mpn_mu_div_q (mp_ptr qp,
	mp_srcptr np, mp_size_t nn,
	mp_srcptr dp, mp_size_t dn,
	mp_ptr scratch)
	{
	mp_ptr tp, rp;
	mp_size_t qn;
	mp_limb_t cy, qh;
	TMP_DECL;

	TMP_MARK;

	qn = nn - dn;

	tp = TMP_BALLOC_LIMBS (qn + 1);

	if (qn >= dn) /* nn >= 2dn + 1 /
	{
	/* \|_______________________\| dividend
	\|________\| divisor */

	rp = TMP_BALLOC_LIMBS (nn + 1);
	MPN_COPY (rp + 1, np, nn);
	rp[0] = 0;

	qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
	if (qh != 0)
	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);

	cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);

	if (UNLIKELY (cy != 0))
	{
	/* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
	canonically reduced, replace the returned value of B^(qn-dn)+eps
	by the largest possible value. */
	mp_size_t i;
	for (i = 0; i < qn + 1; i++)
	tp[i] = GMP_NUMB_MAX;
	}

	/* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is
	smaller than the max error, we cannot trust the quotient. */
	if (tp[0] > 4)
	{
	MPN_COPY (qp, tp + 1, qn);
	}
	else
	{
	mp_limb_t cy;
	mp_ptr pp;

	pp = rp;
	mpn_mul (pp, tp + 1, qn, dp, dn);

	cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;

	if (cy \|\| mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
	qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
	else /* Same as above */
	MPN_COPY (qp, tp + 1, qn);
	}
	}
	else
	{
	/* \|_______________________\| dividend
	\|________________\| divisor */

	/* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
	here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only
	the most significant dn-1 limbs will actually be read, but it is not
	pretty. */

	qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
	dp + dn - (qn + 1), qn + 1, scratch);

	/* The max error of mpn_mu_divappr_q is +4, but we get an additional
	error from the divisor truncation. */
	if (tp[0] > 6)
	{
	MPN_COPY (qp, tp + 1, qn);
	}
	else
	{
	mp_limb_t cy;

	/* FIXME: a shorter product should be enough; we may use already
	allocated space... */
	rp = TMP_BALLOC_LIMBS (nn);
	mpn_mul (rp, dp, dn, tp + 1, qn);

	cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;

	if (cy \|\| mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
	qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
	else /* Same as above */
	MPN_COPY (qp, tp + 1, qn);
	}
	}

	TMP_FREE;
	return qh;
	}

	mp_size_t
	mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
	{
	mp_size_t qn;

	qn = nn - dn;
	if (qn >= dn)
	{
	return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
	}
	else
	{
	return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
	}
	}