Context Navigation

Dragon4.cpp @ 1038

Last change on this file since 1038 was 834, checked in by Maciej Komosinski, 6 years ago
Renamed Math.h to MathDragon4.h because the <math.h> name clash is unfortunate (and caused problems with C++Builder and filename case-insensitive Windows)
File size: 40.9 KB

Rev	Line
[823]	1	/******************************************************************************
	2	Copyright (c) 2014 Ryan Juckett
	3	http://www.ryanjuckett.com/
	4
	5	This software is provided 'as-is', without any express or implied
	6	warranty. In no event will the authors be held liable for any damages
	7	arising from the use of this software.
	8
	9	Permission is granted to anyone to use this software for any purpose,
	10	including commercial applications, and to alter it and redistribute it
	11	freely, subject to the following restrictions:
	12
	13	1. The origin of this software must not be misrepresented; you must not
	14	claim that you wrote the original software. If you use this software
	15	in a product, an acknowledgment in the product documentation would be
	16	appreciated but is not required.
	17
	18	2. Altered source versions must be plainly marked as such, and must not be
	19	misrepresented as being the original software.
	20
	21	3. This notice may not be removed or altered from any source
	22	distribution.
	23	******************************************************************************/
	24
	25	#include "Dragon4.h"
[834]	26	#include "MathDragon4.h"
[823]	27	#include <math.h>
	28
	29	//******************************************************************************
	30	// Maximum number of 32 bit blocks needed in high precision arithmetic
	31	// to print out 64 bit IEEE floating point values.
	32	//******************************************************************************
	33	const tU32 c_BigInt_MaxBlocks = 35;
	34
	35	//******************************************************************************
	36	// This structure stores a high precision unsigned integer. It uses a buffer
	37	// of 32 bit integer blocks along with a length. The lowest bits of the integer
	38	// are stored at the start of the buffer and the length is set to the minimum
	39	// value that contains the integer. Thus, there are never any zero blocks at the
	40	// end of the buffer.
	41	//******************************************************************************
	42	struct tBigInt
	43	{
	44	// Copy integer
	45	tBigInt & operator=(const tBigInt &rhs)
	46	{
	47	tU32 length = rhs.m_length;
	48	tU32 * pLhsCur = m_blocks;
	49	for (const tU32 pRhsCur = rhs.m_blocks, pRhsEnd = pRhsCur + length;
	50	pRhsCur != pRhsEnd;
	51	++pLhsCur, ++pRhsCur)
	52	{
	53	pLhsCur = pRhsCur;
	54	}
	55	m_length = length;
	56	return *this;
	57	}
	58
	59	// Data accessors
	60	tU32 GetLength() const { return m_length; }
	61	tU32 GetBlock(tU32 idx) const { return m_blocks[idx]; }
	62
	63	// Zero helper functions
	64	void SetZero() { m_length = 0; }
	65	tB IsZero() const { return m_length == 0; }
	66
	67	// Basic type accessors
	68	void SetU64(tU64 val)
	69	{
	70	if (val > 0xFFFFFFFF)
	71	{
	72	m_blocks[0] = val & 0xFFFFFFFF;
	73	m_blocks[1] = (val >> 32) & 0xFFFFFFFF;
	74	m_length = 2;
	75	}
	76	else if (val != 0)
	77	{
	78	m_blocks[0] = val & 0xFFFFFFFF;
	79	m_length = 1;
	80	}
	81	else
	82	{
	83	m_length = 0;
	84	}
	85	}
	86
	87	void SetU32(tU32 val)
	88	{
	89	if (val != 0)
	90	{
	91	m_blocks[0] = val;
	92	m_length = (val != 0);
	93	}
	94	else
	95	{
	96	m_length = 0;
	97	}
	98	}
	99
	100	tU32 GetU32() const { return (m_length == 0) ? 0 : m_blocks[0]; }
	101
	102	// Member data
	103	tU32 m_length;
	104	tU32 m_blocks[c_BigInt_MaxBlocks];
	105	};
	106
	107	//******************************************************************************
	108	// Returns 0 if (lhs = rhs), negative if (lhs < rhs), positive if (lhs > rhs)
	109	//******************************************************************************
	110	static tS32 BigInt_Compare(const tBigInt & lhs, const tBigInt & rhs)
	111	{
	112	// A bigger length implies a bigger number.
	113	tS32 lengthDiff = lhs.m_length - rhs.m_length;
	114	if (lengthDiff != 0)
	115	return lengthDiff;
	116
	117	// Compare blocks one by one from high to low.
	118	for (tS32 i = lhs.m_length - 1; i >= 0; --i)
	119	{
	120	if (lhs.m_blocks[i] == rhs.m_blocks[i])
	121	continue;
	122	else if (lhs.m_blocks[i] > rhs.m_blocks[i])
	123	return 1;
	124	else
	125	return -1;
	126	}
	127
	128	// no blocks differed
	129	return 0;
	130	}
	131
	132	//******************************************************************************
	133	// result = lhs + rhs
	134	//******************************************************************************
	135	static void BigInt_Add(tBigInt * pResult, const tBigInt & lhs, const tBigInt & rhs)
	136	{
	137	// determine which operand has the smaller length
	138	const tBigInt * pLarge;
	139	const tBigInt * pSmall;
	140	if (lhs.m_length < rhs.m_length)
	141	{
	142	pSmall = &lhs;
	143	pLarge = &rhs;
	144	}
	145	else
	146	{
	147	pSmall = &rhs;
	148	pLarge = &lhs;
	149	}
	150
	151	const tU32 largeLen = pLarge->m_length;
	152	const tU32 smallLen = pSmall->m_length;
	153
	154	// The output will be at least as long as the largest input
	155	pResult->m_length = largeLen;
	156
	157	// Add each block and add carry the overflow to the next block
	158	tU64 carry = 0;
	159	const tU32 * pLargeCur = pLarge->m_blocks;
	160	const tU32 * pLargeEnd = pLargeCur + largeLen;
	161	const tU32 * pSmallCur = pSmall->m_blocks;
	162	const tU32 * pSmallEnd = pSmallCur + smallLen;
	163	tU32 * pResultCur = pResult->m_blocks;
	164	while (pSmallCur != pSmallEnd)
	165	{
	166	tU64 sum = carry + (tU64)(pLargeCur) + (tU64)(pSmallCur);
	167	carry = sum >> 32;
	168	(*pResultCur) = sum & 0xFFFFFFFF;
	169	++pLargeCur;
	170	++pSmallCur;
	171	++pResultCur;
	172	}
	173
	174	// Add the carry to any blocks that only exist in the large operand
	175	while (pLargeCur != pLargeEnd)
	176	{
	177	tU64 sum = carry + (tU64)(*pLargeCur);
	178	carry = sum >> 32;
	179	(*pResultCur) = sum & 0xFFFFFFFF;
	180	++pLargeCur;
	181	++pResultCur;
	182	}
	183
	184	// If there's still a carry, append a new block
	185	if (carry != 0)
	186	{
	187	RJ_ASSERT(carry == 1);
	188	RJ_ASSERT((tU32)(pResultCur - pResult->m_blocks) == largeLen && (largeLen < c_BigInt_MaxBlocks));
	189	*pResultCur = 1;
	190	pResult->m_length = largeLen + 1;
	191	}
	192	else
	193	{
	194	pResult->m_length = largeLen;
	195	}
	196	}
	197
	198	//******************************************************************************
	199	// result = lhs * rhs
	200	//******************************************************************************
	201	static void BigInt_Multiply(tBigInt * pResult, const tBigInt &lhs, const tBigInt &rhs)
	202	{
	203	RJ_ASSERT( pResult != &lhs && pResult != &rhs );
	204
	205	// determine which operand has the smaller length
	206	const tBigInt * pLarge;
	207	const tBigInt * pSmall;
	208	if (lhs.m_length < rhs.m_length)
	209	{
	210	pSmall = &lhs;
	211	pLarge = &rhs;
	212	}
	213	else
	214	{
	215	pSmall = &rhs;
	216	pLarge = &lhs;
	217	}
	218
	219	// set the maximum possible result length
	220	tU32 maxResultLen = pLarge->m_length + pSmall->m_length;
	221	RJ_ASSERT( maxResultLen <= c_BigInt_MaxBlocks );
	222
	223	// clear the result data
	224	for(tU32 * pCur = pResult->m_blocks, *pEnd = pCur + maxResultLen; pCur != pEnd; ++pCur)
	225	*pCur = 0;
	226
	227	// perform standard long multiplication
	228	const tU32 *pLargeBeg = pLarge->m_blocks;
	229	const tU32 *pLargeEnd = pLargeBeg + pLarge->m_length;
	230
	231	// for each small block
	232	tU32 *pResultStart = pResult->m_blocks;
	233	for(const tU32 pSmallCur = pSmall->m_blocks, pSmallEnd = pSmallCur + pSmall->m_length;
	234	pSmallCur != pSmallEnd;
	235	++pSmallCur, ++pResultStart)
	236	{
	237	// if non-zero, multiply against all the large blocks and add into the result
	238	const tU32 multiplier = *pSmallCur;
	239	if (multiplier != 0)
	240	{
	241	const tU32 *pLargeCur = pLargeBeg;
	242	tU32 *pResultCur = pResultStart;
	243	tU64 carry = 0;
	244	do
	245	{
	246	tU64 product = (pResultCur) + (pLargeCur)*(tU64)multiplier + carry;
	247	carry = product >> 32;
	248	*pResultCur = product & 0xFFFFFFFF;
	249	++pLargeCur;
	250	++pResultCur;
	251	} while(pLargeCur != pLargeEnd);
	252
	253	RJ_ASSERT(pResultCur < pResult->m_blocks + maxResultLen);
	254	*pResultCur = (tU32)(carry & 0xFFFFFFFF);
	255	}
	256	}
	257
	258	// check if the terminating block has no set bits
	259	if (maxResultLen > 0 && pResult->m_blocks[maxResultLen - 1] == 0)
	260	pResult->m_length = maxResultLen-1;
	261	else
	262	pResult->m_length = maxResultLen;
	263	}
	264
	265	//******************************************************************************
	266	// result = lhs * rhs
	267	//******************************************************************************
	268	static void BigInt_Multiply(tBigInt * pResult, const tBigInt & lhs, tU32 rhs)
	269	{
	270	// perform long multiplication
	271	tU32 carry = 0;
	272	tU32 *pResultCur = pResult->m_blocks;
	273	const tU32 *pLhsCur = lhs.m_blocks;
	274	const tU32 *pLhsEnd = lhs.m_blocks + lhs.m_length;
	275	for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++pResultCur )
	276	{
	277	tU64 product = (tU64)(pLhsCur) rhs + carry;
	278	*pResultCur = (tU32)(product & 0xFFFFFFFF);
	279	carry = product >> 32;
	280	}
	281
	282	// if there is a remaining carry, grow the array
	283	if (carry != 0)
	284	{
	285	// grow the array
	286	RJ_ASSERT(lhs.m_length + 1 <= c_BigInt_MaxBlocks);
	287	*pResultCur = (tU32)carry;
	288	pResult->m_length = lhs.m_length + 1;
	289	}
	290	else
	291	{
	292	pResult->m_length = lhs.m_length;
	293	}
	294	}
	295
	296	//******************************************************************************
	297	// result = in * 2
	298	//******************************************************************************
	299	static void BigInt_Multiply2(tBigInt * pResult, const tBigInt &in)
	300	{
	301	// shift all the blocks by one
	302	tU32 carry = 0;
	303
	304	tU32 *pResultCur = pResult->m_blocks;
	305	const tU32 *pLhsCur = in.m_blocks;
	306	const tU32 *pLhsEnd = in.m_blocks + in.m_length;
	307	for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++pResultCur )
	308	{
	309	tU32 cur = *pLhsCur;
	310	*pResultCur = (cur << 1) \| carry;
	311	carry = cur >> 31;
	312	}
	313
	314	if (carry != 0)
	315	{
	316	// grow the array
	317	RJ_ASSERT(in.m_length + 1 <= c_BigInt_MaxBlocks);
	318	*pResultCur = carry;
	319	pResult->m_length = in.m_length + 1;
	320	}
	321	else
	322	{
	323	pResult->m_length = in.m_length;
	324	}
	325	}
	326
	327	//******************************************************************************
	328	// result = result * 2
	329	//******************************************************************************
	330	static void BigInt_Multiply2(tBigInt * pResult)
	331	{
	332	// shift all the blocks by one
	333	tU32 carry = 0;
	334
	335	tU32 *pCur = pResult->m_blocks;
	336	tU32 *pEnd = pResult->m_blocks + pResult->m_length;
	337	for ( ; pCur != pEnd; ++pCur )
	338	{
	339	tU32 cur = *pCur;
	340	*pCur = (cur << 1) \| carry;
	341	carry = cur >> 31;
	342	}
	343
	344	if (carry != 0)
	345	{
	346	// grow the array
	347	RJ_ASSERT(pResult->m_length + 1 <= c_BigInt_MaxBlocks);
	348	*pCur = carry;
	349	++pResult->m_length;
	350	}
	351	}
	352
	353	//******************************************************************************
	354	// result = result * 10
	355	//******************************************************************************
	356	static void BigInt_Multiply10(tBigInt * pResult)
	357	{
	358	// multiply all the blocks
	359	tU64 carry = 0;
	360
	361	tU32 *pCur = pResult->m_blocks;
	362	tU32 *pEnd = pResult->m_blocks + pResult->m_length;
	363	for ( ; pCur != pEnd; ++pCur )
	364	{
	365	tU64 product = (tU64)(pCur) 10ull + carry;
	366	(*pCur) = (tU32)(product & 0xFFFFFFFF);
	367	carry = product >> 32;
	368	}
	369
	370	if (carry != 0)
	371	{
	372	// grow the array
	373	RJ_ASSERT(pResult->m_length + 1 <= c_BigInt_MaxBlocks);
	374	*pCur = (tU32)carry;
	375	++pResult->m_length;
	376	}
	377	}
	378
	379	//******************************************************************************
	380	//******************************************************************************
	381	static tU32 g_PowerOf10_U32[] =
	382	{
	383	1, // 10 ^ 0
	384	10, // 10 ^ 1
	385	100, // 10 ^ 2
	386	1000, // 10 ^ 3
	387	10000, // 10 ^ 4
	388	100000, // 10 ^ 5
	389	1000000, // 10 ^ 6
	390	10000000, // 10 ^ 7
	391	};
	392
	393	//******************************************************************************
	394	// Note: This has a lot of wasted space in the big integer structures of the
	395	// early table entries. It wouldn't be terribly hard to make the multiply
	396	// function work on integer pointers with an array length instead of
	397	// the tBigInt struct which would allow us to store a minimal amount of
	398	// data here.
	399	//******************************************************************************
	400	static tBigInt g_PowerOf10_Big[] =
	401	{
	402	// 10 ^ 8
	403	{ 1, { 100000000 } },
	404	// 10 ^ 16
	405	{ 2, { 0x6fc10000, 0x002386f2 } },
	406	// 10 ^ 32
	407	{ 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } },
	408	// 10 ^ 64
	409	{ 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed, 0xe93ff9f4, 0x00184f03, } },
	410	// 10 ^ 128
	411	{ 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01, 0x03df9909, 0x0f1538fd,
	412	0x2374e42f, 0xd3cff5ec, 0xc404dc08, 0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e, } },
	413	// 10 ^ 256
	414	{ 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	415	0x00000000, 0x982e7c01, 0xbed3875b, 0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70,
	416	0xd595d80f, 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, 0xcc5573c0,
	417	0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, 0x5fdcefce, 0x000553f7, } }
	418	};
	419
	420	//******************************************************************************
	421	// result = 10^exponent
	422	//******************************************************************************
	423	static void BigInt_Pow10(tBigInt * pResult, tU32 exponent)
	424	{
	425	// make sure the exponent is within the bounds of the lookup table data
	426	RJ_ASSERT(exponent < 512);
	427
	428	// create two temporary values to reduce large integer copy operations
	429	tBigInt temp1;
	430	tBigInt temp2;
	431	tBigInt *pCurTemp = &temp1;
	432	tBigInt *pNextTemp = &temp2;
	433
	434	// initialize the result by looking up a 32-bit power of 10 corresponding to the first 3 bits
	435	tU32 smallExponent = exponent & 0x7;
	436	pCurTemp->SetU32(g_PowerOf10_U32[smallExponent]);
	437
	438	// remove the low bits that we used for the 32-bit lookup table
	439	exponent >>= 3;
	440	tU32 tableIdx = 0;
	441
	442	// while there are remaining bits in the exponent to be processed
	443	while (exponent != 0)
	444	{
	445	// if the current bit is set, multiply it with the corresponding power of 10
	446	if(exponent & 1)
	447	{
	448	// multiply into the next temporary
	449	BigInt_Multiply( pNextTemp, *pCurTemp, g_PowerOf10_Big[tableIdx] );
	450
	451	// swap to the next temporary
	452	tBigInt * pSwap = pCurTemp;
	453	pCurTemp = pNextTemp;
	454	pNextTemp = pSwap;
	455	}
	456
	457	// advance to the next bit
	458	++tableIdx;
	459	exponent >>= 1;
	460	}
	461
	462	// output the result
	463	pResult = pCurTemp;
	464	}
	465
	466	//******************************************************************************
	467	// result = in * 10^exponent
	468	//******************************************************************************
	469	static void BigInt_MultiplyPow10(tBigInt * pResult, const tBigInt & in, tU32 exponent)
	470	{
	471	// make sure the exponent is within the bounds of the lookup table data
	472	RJ_ASSERT(exponent < 512);
	473
	474	// create two temporary values to reduce large integer copy operations
	475	tBigInt temp1;
	476	tBigInt temp2;
	477	tBigInt *pCurTemp = &temp1;
	478	tBigInt *pNextTemp = &temp2;
	479
	480	// initialize the result by looking up a 32-bit power of 10 corresponding to the first 3 bits
	481	tU32 smallExponent = exponent & 0x7;
	482	if (smallExponent != 0)
	483	{
	484	BigInt_Multiply( pCurTemp, in, g_PowerOf10_U32[smallExponent] );
	485	}
	486	else
	487	{
	488	*pCurTemp = in;
	489	}
	490
	491	// remove the low bits that we used for the 32-bit lookup table
	492	exponent >>= 3;
	493	tU32 tableIdx = 0;
	494
	495	// while there are remaining bits in the exponent to be processed
	496	while (exponent != 0)
	497	{
	498	// if the current bit is set, multiply it with the corresponding power of 10
	499	if(exponent & 1)
	500	{
	501	// multiply into the next temporary
	502	BigInt_Multiply( pNextTemp, *pCurTemp, g_PowerOf10_Big[tableIdx] );
	503
	504	// swap to the next temporary
	505	tBigInt * pSwap = pCurTemp;
	506	pCurTemp = pNextTemp;
	507	pNextTemp = pSwap;
	508	}
	509
	510	// advance to the next bit
	511	++tableIdx;
	512	exponent >>= 1;
	513	}
	514
	515	// output the result
	516	pResult = pCurTemp;
	517	}
	518
	519	//******************************************************************************
	520	// result = 2^exponent
	521	//******************************************************************************
	522	static inline void BigInt_Pow2(tBigInt * pResult, tU32 exponent)
	523	{
	524	tU32 blockIdx = exponent / 32;
	525	RJ_ASSERT( blockIdx < c_BigInt_MaxBlocks );
	526
	527	for ( tU32 i = 0; i <= blockIdx; ++i)
	528	pResult->m_blocks[i] = 0;
	529
	530	pResult->m_length = blockIdx + 1;
	531
	532	tU32 bitIdx = (exponent % 32);
	533	pResult->m_blocks[blockIdx] \|= (1 << bitIdx);
	534	}
	535
	536	//******************************************************************************
	537	// This function will divide two large numbers under the assumption that the
	538	// result is within the range [0,10) and the input numbers have been shifted
	539	// to satisfy:
	540	// - The highest block of the divisor is greater than or equal to 8 such that
	541	// there is enough precision to make an accurate first guess at the quotient.
	542	// - The highest block of the divisor is less than the maximum value on an
	543	// unsigned 32-bit integer such that we can safely increment without overflow.
	544	// - The dividend does not contain more blocks than the divisor such that we
	545	// can estimate the quotient by dividing the equivalently placed high blocks.
	546	//
	547	// quotient = floor(dividend / divisor)
	548	// remainder = dividend - quotient*divisor
	549	//
	550	// pDividend is updated to be the remainder and the quotient is returned.
	551	//******************************************************************************
	552	static tU32 BigInt_DivideWithRemainder_MaxQuotient9(tBigInt * pDividend, const tBigInt & divisor)
	553	{
	554	// Check that the divisor has been correctly shifted into range and that it is not
	555	// smaller than the dividend in length.
	556	RJ_ASSERT( !divisor.IsZero() &&
	557	divisor.m_blocks[divisor.m_length-1] >= 8 &&
	558	divisor.m_blocks[divisor.m_length-1] < 0xFFFFFFFF &&
	559	pDividend->m_length <= divisor.m_length );
	560
	561	// If the dividend is smaller than the divisor, the quotient is zero and the divisor is already
	562	// the remainder.
	563	tU32 length = divisor.m_length;
	564	if (pDividend->m_length < divisor.m_length)
	565	return 0;
	566
	567	const tU32 * pFinalDivisorBlock = divisor.m_blocks + length - 1;
	568	tU32 * pFinalDividendBlock = pDividend->m_blocks + length - 1;
	569
	570	// Compute an estimated quotient based on the high block value. This will either match the actual quotient or
	571	// undershoot by one.
	572	tU32 quotient = pFinalDividendBlock / (pFinalDivisorBlock + 1);
	573	RJ_ASSERT(quotient <= 9);
	574
	575	// Divide out the estimated quotient
	576	if (quotient != 0)
	577	{
	578	// dividend = dividend - divisor*quotient
	579	const tU32 *pDivisorCur = divisor.m_blocks;
	580	tU32 *pDividendCur = pDividend->m_blocks;
	581
	582	tU64 borrow = 0;
	583	tU64 carry = 0;
	584	do
	585	{
	586	tU64 product = (tU64)pDivisorCur (tU64)quotient + carry;
	587	carry = product >> 32;
	588
	589	tU64 difference = (tU64)*pDividendCur - (product & 0xFFFFFFFF) - borrow;
	590	borrow = (difference >> 32) & 1;
	591
	592	*pDividendCur = difference & 0xFFFFFFFF;
	593
	594	++pDivisorCur;
	595	++pDividendCur;
	596	} while(pDivisorCur <= pFinalDivisorBlock);
	597
	598	// remove all leading zero blocks from dividend
	599	while (length > 0 && pDividend->m_blocks[length - 1] == 0)
	600	--length;
	601
	602	pDividend->m_length = length;
	603	}
	604
	605	// If the dividend is still larger than the divisor, we overshot our estimate quotient. To correct,
	606	// we increment the quotient and subtract one more divisor from the dividend.
	607	if ( BigInt_Compare(*pDividend, divisor) >= 0 )
	608	{
	609	++quotient;
	610
	611	// dividend = dividend - divisor
	612	const tU32 *pDivisorCur = divisor.m_blocks;
	613	tU32 *pDividendCur = pDividend->m_blocks;
	614
	615	tU64 borrow = 0;
	616	do
	617	{
	618	tU64 difference = (tU64)pDividendCur - (tU64)pDivisorCur - borrow;
	619	borrow = (difference >> 32) & 1;
	620
	621	*pDividendCur = difference & 0xFFFFFFFF;
	622
	623	++pDivisorCur;
	624	++pDividendCur;
	625	} while(pDivisorCur <= pFinalDivisorBlock);
	626
	627	// remove all leading zero blocks from dividend
	628	while (length > 0 && pDividend->m_blocks[length - 1] == 0)
	629	--length;
	630
	631	pDividend->m_length = length;
	632	}
	633
	634	return quotient;
	635	}
	636
	637	//******************************************************************************
	638	// result = result << shift
	639	//******************************************************************************
	640	static void BigInt_ShiftLeft(tBigInt * pResult, tU32 shift)
	641	{
	642	RJ_ASSERT( shift != 0 );
	643
	644	tU32 shiftBlocks = shift / 32;
	645	tU32 shiftBits = shift % 32;
	646
	647	// process blocks high to low so that we can safely process in place
	648	const tU32 * pInBlocks = pResult->m_blocks;
	649	tS32 inLength = pResult->m_length;
	650	RJ_ASSERT( inLength + shiftBlocks <= c_BigInt_MaxBlocks );
	651
	652	// check if the shift is block aligned
	653	if (shiftBits == 0)
	654	{
	655	// copy blocks from high to low
	656	for (tU32 * pInCur = pResult->m_blocks + inLength - 1, *pOutCur = pInCur + shiftBlocks;
	657	pInCur >= pInBlocks;
	658	--pInCur, --pOutCur)
	659	{
	660	pOutCur = pInCur;
	661	}
	662
	663	// zero the remaining low blocks
	664	for ( tU32 i = 0; i < shiftBlocks; ++i)
	665	pResult->m_blocks[i] = 0;
	666
	667	pResult->m_length += shiftBlocks;
	668	}
	669	// else we need to shift partial blocks
	670	else
	671	{
	672	tS32 inBlockIdx = inLength - 1;
	673	tU32 outBlockIdx = inLength + shiftBlocks;
	674
	675	// set the length to hold the shifted blocks
	676	RJ_ASSERT( outBlockIdx < c_BigInt_MaxBlocks );
	677	pResult->m_length = outBlockIdx + 1;
	678
	679	// output the initial blocks
	680	const tU32 lowBitsShift = (32 - shiftBits);
	681	tU32 highBits = 0;
	682	tU32 block = pResult->m_blocks[inBlockIdx];
	683	tU32 lowBits = block >> lowBitsShift;
	684	while ( inBlockIdx > 0 )
	685	{
	686	pResult->m_blocks[outBlockIdx] = highBits \| lowBits;
	687	highBits = block << shiftBits;
	688
	689	--inBlockIdx;
	690	--outBlockIdx;
	691
	692	block = pResult->m_blocks[inBlockIdx];
	693	lowBits = block >> lowBitsShift;
	694	}
	695
	696	// output the final blocks
	697	RJ_ASSERT( outBlockIdx == shiftBlocks + 1 );
	698	pResult->m_blocks[outBlockIdx] = highBits \| lowBits;
	699	pResult->m_blocks[outBlockIdx-1] = block << shiftBits;
	700
	701	// zero the remaining low blocks
	702	for ( tU32 i = 0; i < shiftBlocks; ++i)
	703	pResult->m_blocks[i] = 0;
	704
	705	// check if the terminating block has no set bits
	706	if (pResult->m_blocks[pResult->m_length - 1] == 0)
	707	--pResult->m_length;
	708	}
	709	}
	710
	711	//******************************************************************************
	712	// This is an implementation the Dragon4 algorithm to convert a binary number
	713	// in floating point format to a decimal number in string format. The function
	714	// returns the number of digits written to the output buffer and the output is
	715	// not NUL terminated.
	716	//
	717	// The floating point input value is (mantissa * 2^exponent).
	718	//
	719	// See the following papers for more information on the algorithm:
	720	// "How to Print Floating-Point Numbers Accurately"
	721	// Steele and White
	722	// http://kurtstephens.com/files/p372-steele.pdf
	723	// "Printing Floating-Point Numbers Quickly and Accurately"
	724	// Burger and Dybvig
	725	// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656&rep=rep1&type=pdf
	726	//******************************************************************************
	727	tU32 Dragon4
	728	(
	729	const tU64 mantissa, // value significand
	730	const tS32 exponent, // value exponent in base 2
	731	const tU32 mantissaHighBitIdx, // index of the highest set mantissa bit
	732	const tB hasUnequalMargins, // is the high margin twice as large as the low margin
	733	const tCutoffMode cutoffMode, // how to determine output length
	734	tU32 cutoffNumber, // parameter to the selected cutoffMode
	735	tC8 * pOutBuffer, // buffer to output into
	736	tU32 bufferSize, // maximum characters that can be printed to pOutBuffer
	737	tS32 * pOutExponent // the base 10 exponent of the first digit
	738	)
	739	{
	740	tC8 * pCurDigit = pOutBuffer;
	741
	742	RJ_ASSERT( bufferSize > 0 );
	743
	744	// if the mantissa is zero, the value is zero regardless of the exponent
	745	if (mantissa == 0)
	746	{
	747	*pCurDigit = '0';
	748	*pOutExponent = 0;
	749	return 1;
	750	}
	751
	752	// compute the initial state in integral form such that
	753	// value = scaledValue / scale
	754	// marginLow = scaledMarginLow / scale
	755	tBigInt scale; // positive scale applied to value and margin such that they can be
	756	// represented as whole numbers
	757	tBigInt scaledValue; // scale * mantissa
	758	tBigInt scaledMarginLow; // scale * 0.5 * (distance between this floating-point number and its
	759	// immediate lower value)
	760
	761	// For normalized IEEE floating point values, each time the exponent is incremented the margin also
	762	// doubles. That creates a subset of transition numbers where the high margin is twice the size of
	763	// the low margin.
	764	tBigInt * pScaledMarginHigh;
	765	tBigInt optionalMarginHigh;
	766
	767	if ( hasUnequalMargins )
	768	{
	769	// if we have no fractional component
	770	if (exponent > 0)
	771	{
	772	// 1) Expand the input value by multiplying out the mantissa and exponent. This represents
	773	// the input value in its whole number representation.
	774	// 2) Apply an additional scale of 2 such that later comparisons against the margin values
	775	// are simplified.
	776	// 3) Set the margin value to the lowest mantissa bit's scale.
	777
	778	// scaledValue = 2 * 2 * mantissa*2^exponent
	779	scaledValue.SetU64( 4 * mantissa );
	780	BigInt_ShiftLeft( &scaledValue, exponent );
	781
	782	// scale = 2 * 2 * 1
	783	scale.SetU32( 4 );
	784
	785	// scaledMarginLow = 2 * 2^(exponent-1)
	786	BigInt_Pow2( &scaledMarginLow, exponent );
	787
	788	// scaledMarginHigh = 2 * 2 * 2^(exponent-1)
	789	BigInt_Pow2( &optionalMarginHigh, exponent + 1 );
	790	}
	791	// else we have a fractional exponent
	792	else
	793	{
	794	// In order to track the mantissa data as an integer, we store it as is with a large scale
	795
	796	// scaledValue = 2 * 2 * mantissa
	797	scaledValue.SetU64( 4 * mantissa );
	798
	799	// scale = 2 * 2 * 2^(-exponent)
	800	BigInt_Pow2(&scale, -exponent + 2 );
	801
	802	// scaledMarginLow = 2 * 2^(-1)
	803	scaledMarginLow.SetU32( 1 );
	804
	805	// scaledMarginHigh = 2 * 2 * 2^(-1)
	806	optionalMarginHigh.SetU32( 2 );
	807	}
	808
	809	// the high and low margins are different
	810	pScaledMarginHigh = &optionalMarginHigh;
	811	}
	812	else
	813	{
	814	// if we have no fractional component
	815	if (exponent > 0)
	816	{
	817	// 1) Expand the input value by multiplying out the mantissa and exponent. This represents
	818	// the input value in its whole number representation.
	819	// 2) Apply an additional scale of 2 such that later comparisons against the margin values
	820	// are simplified.
	821	// 3) Set the margin value to the lowest mantissa bit's scale.
	822
	823	// scaledValue = 2 * mantissa*2^exponent
	824	scaledValue.SetU64( 2 * mantissa );
	825	BigInt_ShiftLeft( &scaledValue, exponent );
	826
	827	// scale = 2 * 1
	828	scale.SetU32( 2 );
	829
	830	// scaledMarginLow = 2 * 2^(exponent-1)
	831	BigInt_Pow2( &scaledMarginLow, exponent );
	832	}
	833	// else we have a fractional exponent
	834	else
	835	{
	836	// In order to track the mantissa data as an integer, we store it as is with a large scale
	837
	838	// scaledValue = 2 * mantissa
	839	scaledValue.SetU64( 2 * mantissa );
	840
	841	// scale = 2 * 2^(-exponent)
	842	BigInt_Pow2(&scale, -exponent + 1 );
	843
	844	// scaledMarginLow = 2 * 2^(-1)
	845	scaledMarginLow.SetU32( 1 );
	846	}
	847
	848	// the high and low margins are equal
	849	pScaledMarginHigh = &scaledMarginLow;
	850	}
	851
	852	// Compute an estimate for digitExponent that will be correct or undershoot by one.
	853	// This optimization is based on the paper "Printing Floating-Point Numbers Quickly and Accurately"
	854	// by Burger and Dybvig http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656&rep=rep1&type=pdf
	855	// We perform an additional subtraction of 0.69 to increase the frequency of a failed estimate
	856	// because that lets us take a faster branch in the code. 0.69 is chosen because 0.69 + log10(2) is
	857	// less than one by a reasonable epsilon that will account for any floating point error.
	858	//
	859	// We want to set digitExponent to floor(log10(v)) + 1
	860	// v = mantissa*2^exponent
	861	// log2(v) = log2(mantissa) + exponent;
	862	// log10(v) = log2(v) * log10(2)
	863	// floor(log2(v)) = mantissaHighBitIdx + exponent;
	864	// log10(v) - log10(2) < (mantissaHighBitIdx + exponent) * log10(2) <= log10(v)
	865	// log10(v) < (mantissaHighBitIdx + exponent) * log10(2) + log10(2) <= log10(v) + log10(2)
	866	// floor( log10(v) ) < ceil( (mantissaHighBitIdx + exponent) * log10(2) ) <= floor( log10(v) ) + 1
	867	const tF64 log10_2 = 0.30102999566398119521373889472449;
[834]	868	tS32 digitExponent = (tS32)(ceil(tF64((tS32)mantissaHighBitIdx + exponent) * log10_2 - 0.69));
[823]	869
	870	// if the digit exponent is smaller than the smallest desired digit for fractional cutoff,
	871	// pull the digit back into legal range at which point we will round to the appropriate value.
	872	// Note that while our value for digitExponent is still an estimate, this is safe because it
	873	// only increases the number. This will either correct digitExponent to an accurate value or it
	874	// will clamp it above the accurate value.
	875	if (cutoffMode == CutoffMode_FractionLength && digitExponent <= -(tS32)cutoffNumber)
	876	{
	877	digitExponent = -(tS32)cutoffNumber + 1;
	878	}
	879
	880	// Divide value by 10^digitExponent.
	881	if (digitExponent > 0)
	882	{
	883	// The exponent is positive creating a division so we multiply up the scale.
	884	tBigInt temp;
	885	BigInt_MultiplyPow10( &temp, scale, digitExponent );
	886	scale = temp;
	887	}
	888	else if (digitExponent < 0)
	889	{
	890	// The exponent is negative creating a multiplication so we multiply up the scaledValue,
	891	// scaledMarginLow and scaledMarginHigh.
	892	tBigInt pow10;
	893	BigInt_Pow10( &pow10, -digitExponent);
	894
	895	tBigInt temp;
	896	BigInt_Multiply( &temp, scaledValue, pow10);
	897	scaledValue = temp;
	898
	899	BigInt_Multiply( &temp, scaledMarginLow, pow10);
	900	scaledMarginLow = temp;
	901
	902	if (pScaledMarginHigh != &scaledMarginLow)
	903	BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );
	904	}
	905
	906	// If (value >= 1), our estimate for digitExponent was too low
	907	if( BigInt_Compare(scaledValue,scale) >= 0 )
	908	{
	909	// The exponent estimate was incorrect.
	910	// Increment the exponent and don't perform the premultiply needed
	911	// for the first loop iteration.
	912	digitExponent = digitExponent + 1;
	913	}
	914	else
	915	{
	916	// The exponent estimate was correct.
	917	// Multiply larger by the output base to prepare for the first loop iteration.
	918	BigInt_Multiply10( &scaledValue );
	919	BigInt_Multiply10( &scaledMarginLow );
	920	if (pScaledMarginHigh != &scaledMarginLow)
	921	BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );
	922	}
	923
	924	// Compute the cutoff exponent (the exponent of the final digit to print).
	925	// Default to the maximum size of the output buffer.
	926	tS32 cutoffExponent = digitExponent - bufferSize;
	927	switch(cutoffMode)
	928	{
	929	// print digits until we pass the accuracy margin limits or buffer size
	930	case CutoffMode_Unique:
	931	break;
	932
	933	// print cutoffNumber of digits or until we reach the buffer size
	934	case CutoffMode_TotalLength:
	935	{
	936	tS32 desiredCutoffExponent = digitExponent - (tS32)cutoffNumber;
	937	if (desiredCutoffExponent > cutoffExponent)
	938	cutoffExponent = desiredCutoffExponent;
	939	}
	940	break;
	941
	942	// print cutoffNumber digits past the decimal point or until we reach the buffer size
	943	case CutoffMode_FractionLength:
	944	{
	945	tS32 desiredCutoffExponent = -(tS32)cutoffNumber;
	946	if (desiredCutoffExponent > cutoffExponent)
	947	cutoffExponent = desiredCutoffExponent;
	948	}
	949	break;
	950	}
	951
	952	// Output the exponent of the first digit we will print
	953	*pOutExponent = digitExponent-1;
	954
	955	// In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(),
	956	// we need to scale up our values such that the highest block of the denominator
	957	// is greater than or equal to 8. We also need to guarantee that the numerator
	958	// can never have a length greater than the denominator after each loop iteration.
	959	// This requires the highest block of the denominator to be less than or equal to
	960	// 429496729 which is the highest number that can be multiplied by 10 without
	961	// overflowing to a new block.
	962	RJ_ASSERT( scale.GetLength() > 0 );
	963	tU32 hiBlock = scale.GetBlock( scale.GetLength() - 1 );
	964	if (hiBlock < 8 \|\| hiBlock > 429496729)
	965	{
	966	// Perform a bit shift on all values to get the highest block of the denominator into
	967	// the range [8,429496729]. We are more likely to make accurate quotient estimations
	968	// in BigInt_DivideWithRemainder_MaxQuotient9() with higher denominator values so
	969	// we shift the denominator to place the highest bit at index 27 of the highest block.
	970	// This is safe because (2^28 - 1) = 268435455 which is less than 429496729. This means
	971	// that all values with a highest bit at index 27 are within range.
[834]	972	tU32 hiBlockLog2 = LogBase2(hiBlock);
[823]	973	RJ_ASSERT(hiBlockLog2 < 3 \|\| hiBlockLog2 > 27);
	974	tU32 shift = (32 + 27 - hiBlockLog2) % 32;
	975
	976	BigInt_ShiftLeft( &scale, shift );
	977	BigInt_ShiftLeft( &scaledValue, shift);
	978	BigInt_ShiftLeft( &scaledMarginLow, shift);
	979	if (pScaledMarginHigh != &scaledMarginLow)
	980	BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );
	981	}
	982
	983	// These values are used to inspect why the print loop terminated so we can properly
	984	// round the final digit.
	985	tB low; // did the value get within marginLow distance from zero
	986	tB high; // did the value get within marginHigh distance from one
	987	tU32 outputDigit; // current digit being output
	988
	989	if (cutoffMode == CutoffMode_Unique)
	990	{
	991	// For the unique cutoff mode, we will try to print until we have reached a level of
	992	// precision that uniquely distinguishes this value from its neighbors. If we run
	993	// out of space in the output buffer, we terminate early.
	994	for (;;)
	995	{
	996	digitExponent = digitExponent-1;
	997
	998	// divide out the scale to extract the digit
	999	outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, scale);
	1000	RJ_ASSERT( outputDigit < 10 );
	1001
	1002	// update the high end of the value
	1003	tBigInt scaledValueHigh;
	1004	BigInt_Add( &scaledValueHigh, scaledValue, *pScaledMarginHigh );
	1005
	1006	// stop looping if we are far enough away from our neighboring values
	1007	// or if we have reached the cutoff digit
	1008	low = BigInt_Compare(scaledValue, scaledMarginLow) < 0;
	1009	high = BigInt_Compare(scaledValueHigh, scale) > 0;
	1010	if (low \| high \| (digitExponent == cutoffExponent))
	1011	break;
	1012
	1013	// store the output digit
	1014	*pCurDigit = (tC8)('0' + outputDigit);
	1015	++pCurDigit;
	1016
	1017	// multiply larger by the output base
	1018	BigInt_Multiply10( &scaledValue );
	1019	BigInt_Multiply10( &scaledMarginLow );
	1020	if (pScaledMarginHigh != &scaledMarginLow)
	1021	BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );
	1022	}
	1023	}
	1024	else
	1025	{
	1026	// For length based cutoff modes, we will try to print until we
	1027	// have exhausted all precision (i.e. all remaining digits are zeros) or
	1028	// until we reach the desired cutoff digit.
	1029	low = false;
	1030	high = false;
	1031
	1032	for (;;)
	1033	{
	1034	digitExponent = digitExponent-1;
	1035
	1036	// divide out the scale to extract the digit
	1037	outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, scale);
	1038	RJ_ASSERT( outputDigit < 10 );
	1039
	1040	if ( scaledValue.IsZero() \| (digitExponent == cutoffExponent) )
	1041	break;
	1042
	1043	// store the output digit
	1044	*pCurDigit = (tC8)('0' + outputDigit);
	1045	++pCurDigit;
	1046
	1047	// multiply larger by the output base
	1048	BigInt_Multiply10(&scaledValue);
	1049	}
	1050	}
	1051
	1052	// round off the final digit
	1053	// default to rounding down if value got too close to 0
	1054	tB roundDown = low;
	1055
	1056	// if it is legal to round up and down
	1057	if (low == high)
	1058	{
	1059	// round to the closest digit by comparing value with 0.5. To do this we need to convert
	1060	// the inequality to large integer values.
	1061	// compare( value, 0.5 )
	1062	// compare( scale * value, scale * 0.5 )
	1063	// compare( 2 * scale * value, scale )
	1064	BigInt_Multiply2(&scaledValue);
	1065	tS32 compare = BigInt_Compare(scaledValue, scale);
	1066	roundDown = compare < 0;
	1067
	1068	// if we are directly in the middle, round towards the even digit (i.e. IEEE rouding rules)
	1069	if (compare == 0)
	1070	roundDown = (outputDigit & 1) == 0;
	1071	}
	1072
	1073	// print the rounded digit
	1074	if (roundDown)
	1075	{
	1076	*pCurDigit = (tC8)('0' + outputDigit);
	1077	++pCurDigit;
	1078	}
	1079	else
	1080	{
	1081	// handle rounding up
	1082	if (outputDigit == 9)
	1083	{
	1084	// find the first non-nine prior digit
	1085	for (;;)
	1086	{
	1087	// if we are at the first digit
	1088	if (pCurDigit == pOutBuffer)
	1089	{
	1090	// output 1 at the next highest exponent
	1091	*pCurDigit = '1';
	1092	++pCurDigit;
	1093	*pOutExponent += 1;
	1094	break;
	1095	}
	1096
	1097	--pCurDigit;
	1098	if (*pCurDigit != '9')
	1099	{
	1100	// increment the digit
	1101	*pCurDigit += 1;
	1102	++pCurDigit;
	1103	break;
	1104	}
	1105	}
	1106	}
	1107	else
	1108	{
	1109	// values in the range [0,8] can perform a simple round up
	1110	*pCurDigit = (tC8)('0' + outputDigit + 1);
	1111	++pCurDigit;
	1112	}
	1113	}
	1114
	1115	// return the number of digits output
	1116	tU32 outputLen = (tU32)(pCurDigit - pOutBuffer);
	1117	RJ_ASSERT(outputLen <= bufferSize);
	1118	return outputLen;
	1119	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: cpp/PrintFloat/Dragon4.cpp @ 1038

Download in other formats: