annotate tomsfastmath/tfm.tex @ 646:f10335e5e42f dropbear-tfm

- More asm constraint fixes. Now seems to build OK on 32-bit OS X.
author Matt Johnston <matt@ucc.asn.au>
date Wed, 30 Nov 2011 23:03:47 +0800
parents a362b62d38b2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
643
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
1 \documentclass[b5paper]{book}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
2 \usepackage{hyperref}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
3 \usepackage{makeidx}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
4 \usepackage{amssymb}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
5 \usepackage{color}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
6 \usepackage{alltt}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
7 \usepackage{graphicx}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
8 \usepackage{layout}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
9 \def\union{\cup}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
10 \def\intersect{\cap}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
11 \def\getsrandom{\stackrel{\rm R}{\gets}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
12 \def\cross{\times}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
13 \def\cat{\hspace{0.5em} \| \hspace{0.5em}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
14 \def\catn{$\|$}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
15 \def\divides{\hspace{0.3em} | \hspace{0.3em}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
16 \def\nequiv{\not\equiv}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
17 \def\approx{\raisebox{0.2ex}{\mbox{\small $\sim$}}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
18 \def\lcm{{\rm lcm}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
19 \def\gcd{{\rm gcd}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
20 \def\log{{\rm log}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
21 \def\ord{{\rm ord}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
22 \def\abs{{\mathit abs}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
23 \def\rep{{\mathit rep}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
24 \def\mod{{\mathit\ mod\ }}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
25 \renewcommand{\pmod}[1]{\ ({\rm mod\ }{#1})}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
26 \newcommand{\floor}[1]{\left\lfloor{#1}\right\rfloor}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
27 \newcommand{\ceil}[1]{\left\lceil{#1}\right\rceil}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
28 \def\Or{{\rm\ or\ }}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
29 \def\And{{\rm\ and\ }}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
30 \def\iff{\hspace{1em}\Longleftrightarrow\hspace{1em}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
31 \def\implies{\Rightarrow}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
32 \def\undefined{{\rm ``undefined"}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
33 \def\Proof{\vspace{1ex}\noindent {\bf Proof:}\hspace{1em}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
34 \let\oldphi\phi
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
35 \def\phi{\varphi}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
36 \def\Pr{{\rm Pr}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
37 \newcommand{\str}[1]{{\mathbf{#1}}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
38 \def\F{{\mathbb F}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
39 \def\N{{\mathbb N}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
40 \def\Z{{\mathbb Z}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
41 \def\R{{\mathbb R}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
42 \def\C{{\mathbb C}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
43 \def\Q{{\mathbb Q}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
44 \definecolor{DGray}{gray}{0.5}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
45 \newcommand{\emailaddr}[1]{\mbox{$<${#1}$>$}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
46 \def\twiddle{\raisebox{0.3ex}{\mbox{\tiny $\sim$}}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
47 \def\gap{\vspace{0.5ex}}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
48 \makeindex
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
49 \begin{document}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
50 \frontmatter
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
51 \pagestyle{empty}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
52 \title{TomsFastMath User Manual \\ v0.12}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
53 \author{Tom St Denis \\ [email protected]}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
54 \maketitle
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
55 This text and library are all hereby placed in the public domain. This book has been formatted for B5
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
56 [176x250] paper using the \LaTeX{} {\em book} macro package.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
57
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
58 \vspace{13cm}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
59
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
60 \begin{flushleft}This project was sponsored in part by
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
61
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
62 Secure Science Corporation \url{http://www.securescience.net}.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
63 \end{flushleft}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
64
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
65 \tableofcontents
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
66 \listoffigures
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
67 \mainmatter
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
68 \pagestyle{headings}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
69 \chapter{Introduction}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
70 \section{What is TomsFastMath?}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
71
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
72 TomsFastMath is meant to be a very fast yet still fairly portable and easy to port large
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
73 integer arithmetic library written in ISO C. The goal specifically is to be able to perform
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
74 very fast modular exponentiations and other related functions required for ECC, DH and RSA
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
75 cryptosystems.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
76
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
77 Most of the library is pure ISO C portable source code while a small portion (three files) contain
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
78 a mixture of ISO C and assembler inline fragments. Compared to LibTomMath this new library is
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
79 meant to be much faster while sacrificing flexibiltiy. This is accomplished through several means.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
80
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
81 \begin{enumerate}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
82 \item The new code is slightly messier and contains asm blocks.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
83 \item This uses fixed not multiple precision integers.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
84 \item It is designed only for fast modular exponentiations [e.g. less flexibility].
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
85 \end{enumerate}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
86
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
87 To mitigate some of the problems that arise from using assembler it has been carefully and
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
88 appropriately used where it would make the most gain in performance. Also we use macro's
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
89 for assembler code which allows new ports to be inserted easily.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
90
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
91 The new code uses fixed precision arithmetic which means at compile time you choose a maximum
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
92 precision and all numbers are limited to that. This has the benefit of not requiring any
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
93 memory heap operations (which are slow) in any of the functions. It has the downside that
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
94 integers that are too large are truncated.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
95
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
96 The goal of this library is to be able to perform modular exponentiations (with an odd modulus) very
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
97 fast. This is what takes the most time in systems such as RSA and DH. This also requires
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
98 fast multiplication and squaring and has the side effect of speeding up ECC operations as well.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
99
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
100 \section{License}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
101 TomsFastMath is public domain.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
102
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
103 \section{Building}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
104 To build the library simply type ``make''. Or to install in typical *unix like directories use
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
105 ``make install''. Similarly a shared library can be built with ``make -f makefile.shared install''.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
106
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
107 You can build the test program with ``make test''. To perform simple static testing (useful to
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
108 test out new assembly ports) use the stest program. Type ``make stest'' and run it on your
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
109 target. The program will perform three multiplications, squarings and montgomery reductions.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
110 Likely if your assembly code is invalid this code will exhibit the bug.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
111
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
112 \subsection{Intel CC}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
113 In theory you should be able to build the library with
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
114
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
115 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
116 CFLAGS="-O3 -ip" CC=icc make IGNORE_SPEED=1
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
117 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
118
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
119 However, Intels inline assembler is way less advanced than GCCs. As a result it doesn't compile.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
120 Fortunately it doesn't really matter.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
121
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
122 \subsection{MSVC}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
123 The library doesn't build with MSVC. Imagine that.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
124
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
125 \subsection{Build Limitations}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
126 TomsFastMath has the following build requirements which are non--portable but under most
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
127 circumstances not problematic.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
128
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
129 \begin{enumerate}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
130 \item ``CHAR\_BIT'' must be eight.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
131 \item The ``fp\_digit'' type must be a multiple of eight bits long.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
132 \item The ``fp\_word'' must be at least twice the length of fp\_digit.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
133 \end{enumerate}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
134
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
135 \subsection{Optimization Configuration}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
136 By default TFM is configured for 32--bit digits using ISO C source code. This mode while portable
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
137 is not very efficient. While building the library (from scratch) you can define one of
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
138 several ``CFLAGS'' defines.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
139
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
140 For example, to build with with SSE2 optimizations type
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
141
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
142 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
143 CFLAGS=-DTFM_SSE2 make clean libtfm.a
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
144 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
145
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
146 \subsubsection{x86--32} The ``x86--32'' mode is defined by ``TFM\_X86'' and covers all
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
147 i386 and beyond processors. It requires GCC to build and only works with 32--bit digits. In this
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
148 mode fp\_digit is 32--bits and fp\_word is 64--bits. This mode will be autodetected when building
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
149 with GCC to an ``i386'' target. You can override this behaviour by defining TFM\_NO\_ASM or
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
150 another optimization mode (such as SSE2).
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
151
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
152 \subsubsection{SSE2} The ``SSE2'' mode is defined by ``TFM\_SSE2'' and requires a Pentium 4, Pentium
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
153 M or Athlon64 processor. It requires GCC to build. Note that you shouldn't define both
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
154 TFM\_X86 and TFM\_SSE2 at the same time. This mode only works with 32--bit digits. In this
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
155 mode fp\_digit is 32--bits and fp\_word is 64--bits. While this mode will work on the AMD Athlon64
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
156 series of processors it is less efficient than the native ``x86--64'' mode and not recommended.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
157
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
158 There is an additional ``TFM\_PRESCOTT'' flag that you can define for P4 Prescott processors. This causes
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
159 the mul/sqr functions to use x86\_32 and the montgomery reduction to use SSE2 which is (so far) the fastest
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
160 combination. If you are using an older (e.g. Northwood) generation P4 don't define this.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
161
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
162 \subsubsection{x86--64} The ``x86--64'' mode is defined by ``TFM\_X86\_64'' and requires a
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
163 ``x86--64'' capable processor (Athlon64 and future Pentium processors). It requires GCC to
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
164 build and only works with 64--bit digits. Note that by enabling this mode it will automatically
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
165 enable 64--bit digits. In this mode fp\_digit is 64--bits and fp\_word is 128--bits. This mode will
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
166 be autodetected when building with GCC to an ``x86--64'' target. You can override this behaviour by defining
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
167 TFM\_NO\_ASM.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
168
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
169 \subsubsection{ARM} The ``ARM'' mode is defined by ``TFM\_ARM'' and requires a ARMv4 with the M instructions (enhanced
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
170 multipliers) or higher processor. It requires GCC and works with 32--bit digits. In this mode fp\_digit is 32--bits and
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
171 fp\_word is 64--bits.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
172
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
173 \subsubsection{PPC32} The ``PPC32'' mode is defined by ``TFM\_PPC32'' and requires a standard PPC processor. It doesn't
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
174 use altivec or other extensions so it should work on all compliant implementations of PPC. It requires GCC and works
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
175 with 32--bit digits. In this mode fp\_digit is 32--bits and fp\_word is 64--bits.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
176
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
177 \subsubsection{PPC64} The ``PPC64'' mode is defined by ``TFM\_PPC64'' and requires a 64--bit PPC processor.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
178
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
179 \subsubsection{AVR32} The ``AVR32'' mode is defined by ``TFM\_AVR32'' and requires an Atmel AVR32 processor.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
180
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
181 \subsubsection{Future Releases} Future releases will support additional platform optimizations.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
182 Developers of MIPS and SPARC platforms are encouraged to submit GCC asm inline patches
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
183 (see chapter \ref{chap:asmops} for more information).
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
184
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
185 \begin{figure}[here]
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
186 \begin{small}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
187 \begin{center}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
188 \begin{tabular}{|l|l|}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
189 \hline \textbf{Processor} & \textbf{Recommended Mode} \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
190 \hline All 32--bit x86 platforms & TFM\_X86 \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
191 \hline Pentium 4 & TFM\_SSE2 \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
192 \hline Pentium 4 Prescott & TFM\_SSE2 + TFM\_PRESCOTT \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
193 \hline Athlon64 & TFM\_X86\_64 \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
194 \hline ARMv4 or higher with M & TFM\_ARM \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
195 \hline G3/G4 (32-bit PPC) & TFM\_PPC32 \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
196 \hline G5 (64-bit PPC) & TFM\_PPC64 \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
197 \hline Atmel AVR32 & TFM\_AVR32 \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
198 \hline &\\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
199 \hline x86--32 or x86--64 (with GCC) & Leave blank and let autodetect work \\
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
200 \hline
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
201 \end{tabular}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
202 \caption{Recommended Build Modes}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
203 \end{center}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
204 \end{small}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
205 \end{figure}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
206
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
207 \subsection{Build Configurations}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
208 TomsFastMath is configurable in terms of which unrolled code (if any) is included. By default, the majority of the code is included which
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
209 results in large binaries. The first flag to try out is TFM\_ALREADY\_SET which tells TFM to turn off \textbf{all} unrolled code. This will
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
210 result in a smaller library but also a much slower library.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
211
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
212 From this clean state, you can start enabling unrolled code for given cryptographic tasks at hand. A series of TFM\_MULXYZ and TFM\_SQRXYZ macros
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
213 exist to enable specific unrolled code. For instance, TFM\_MUL32 will enable a 32 digit unrolled multiplier. For a complete list see the tfm.h header
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
214 file. Keep in mind this is for digits not bits. For example, you should enable TFM\_MUL16 if you are doing 1024-bit exptmods on a 64--bit platform, enable
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
215 TFM\_MUL32 on 32--bit platforms.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
216
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
217 To help developers use ECC there are a set of defines for the five NIST curve sizes. They are named TFM\_ECCXYZ where XYZ is one of 192, 224, 256, 384, or 521. These
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
218 enable the multipliers and squaring code for a given curve, autodetecting 64--bit platforms as well.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
219
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
220 \subsection{Precision Configuration}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
221 The precision of all integers in this library are fixed to a limited precision. Essentially
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
222 the rule of setting the precision is if you plan on doing modular exponentiation with $k$--bit
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
223 numbers than the precision must be fixed to $2k$--bits plus four digits.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
224
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
225 This is changed by altering the value of ``FP\_MAX\_SIZE'' in tfm.h to your desired size. By default,
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
226 the library is configured to handle upto 2048--bit inputs to the modular exponentiator.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
227
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
228 \chapter{Getting Started}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
229 \section{Data Types}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
230 TomsFastMath is a large fixed precision integer library. It provides the functionality to
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
231 manipulate large signed integers through a relatively trivial api and a single data type.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
232
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
233 The ``fp\_int'' or fixed precision integer is the data type that the functions operate with.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
234
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
235 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
236 typedef struct {
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
237 fp_digit dp[FP_SIZE];
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
238 int used,
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
239 sign;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
240 } fp_int;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
241 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
242
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
243 The \textbf{dp} member is the array of digits that forms the number. It must always be zero
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
244 padded. The \textbf{used} member is the count of digits used in the array. Although the
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
245 precision is fixed the algorithms are still tuned to not process the entire array if it
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
246 does not have to. The \textbf{sign} indicates the sign of the integer. It is \textbf{FP\_ZPOS} (0)
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
247 if the integer is zero or positive and \textbf{FP\_NEG} (1) otherwise.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
248
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
249 \section{Initialization}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
250 \subsection{Simple Initialization}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
251 To initialize an integer to the default state of zero use the fp\_init() function.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
252
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
253 \index{fp\_init}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
254 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
255 void fp_init(fp_int *a);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
256 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
257
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
258 This will initialize the fp\_int $a$ to zero. Note that the function fp\_zero() is an alias
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
259 for fp\_init().
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
260
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
261 \subsection{Initialize Small Constants}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
262 To initialize an integer with a small single digit value use the fp\_set() function.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
263
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
264 \index{fp\_set}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
265 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
266 void fp_set(fp_int *a, fp_digit b);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
267 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
268
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
269 This will initialize $a$ and set it equal to the digit $b$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
270
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
271 \subsection{Initialize Copy}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
272 To initialize an integer with a copy of another integer use the fp\_init\_copy() function.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
273
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
274 \index{fp\_init\_copy}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
275 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
276 void fp_init_copy(fp_int *a, fp_int *b)
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
277 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
278
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
279 This will initialize $a$ as a copy of $b$. Note that for compatibility with LibTomMath the function
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
280 fp\_copy() is also provided.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
281
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
282 \chapter{Arithmetic Operations}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
283 \section{Odds and Evens}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
284 To quickly and easily tell if an integer is zero, odd or even use the following functions.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
285
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
286 \index{fp\_iszero} \index{fp\_iseven} \index{fp\_isodd}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
287 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
288 int fp_iszero(fp_int *a);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
289 int fp_iseven(fp_int *a);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
290 int fp_isodd(fp_int *a);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
291 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
292
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
293 These will return \textbf{FP\_YES} if the answer to their respective questions is yes. Otherwise they
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
294 return \textbf{FP\_NO}. Note that these are implemented as macros and as such you should avoid using
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
295 ++ or --~-- operators on the input operand.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
296
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
297 \section{Sign Manipulation}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
298 To negate or compute the absolute of an integer use the following functions.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
299
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
300 \index{fp\_neg} \index{fp\_abs}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
301 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
302 void fp_neg(fp_int *a, fp_int *b);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
303 void fp_abs(fp_int *a, fp_int *b);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
304 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
305 This will compute the negation (or absolute) of $a$ and store the result in $b$. Note that these
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
306 are implemented as macros and as such you should avoid using ++ or --~-- operators on the input
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
307 operand.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
308
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
309 \section{Comparisons}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
310 To perform signed or unsigned comparisons use following functions.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
311
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
312 \index{fp\_cmp} \index{fp\_cmp\_mag}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
313 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
314 int fp_cmp(fp_int *a, fp_int *b);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
315 int fp_cmp_mag(fp_int *a, fp_int *b);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
316 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
317 These will compare $a$ to $b$. They will return \textbf{FP\_GT} if $a$ is larger than $b$,
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
318 \textbf{FP\_EQ} if they are equal and \textbf{FP\_LT} if $a$ is less than $b$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
319
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
320 The function fp\_cmp performs signed comparisons while the other performs unsigned comparisons.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
321
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
322 \section{Shifting}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
323 To shift the digits of an fp\_int left or right use the following functions.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
324
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
325 \index{fp\_lshd} \index{fp\_rshd}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
326 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
327 void fp_lshd(fp_int *a, int x);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
328 void fp_rshd(fp_int *a, int x);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
329 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
330
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
331 These will shift the digits of $a$ left (or right respectively) $x$ digits.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
332
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
333 To shift individual bits of an fp\_int use the following functions.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
334
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
335 \index{fp\_div\_2d} \index{fp\_mod\_2d} \index{fp\_mul\_2d} \index{fp\_div\_2} \index{fp\_mul\_2}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
336 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
337 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
338 void fp_mod_2d(fp_int *a, int b, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
339 void fp_mul_2d(fp_int *a, int b, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
340 void fp_mul_2(fp_int *a, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
341 void fp_div_2(fp_int *a, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
342 void fp_2expt(fp_int *a, int b);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
343 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
344 fp\_div\_2d() will divide $a$ by $2^b$ and store the quotient in $c$ and remainder in $d$. Either of
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
345 $c$ or $d$ can be \textbf{NULL} if their value is not required. fp\_mod\_2d() is a shortcut to
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
346 compute the remainder directly. fp\_mul\_2d() will multiply $a$ by $2^b$ and store the result in $c$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
347
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
348 The fp\_mul\_2() and fp\_div\_2() functions are optimized multiplication and divisions by two. The
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
349 function fp\_2expt() will compute $a = 2^b$ quickly.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
350
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
351 To quickly count the number of least significant bits that are zero use the following function.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
352
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
353 \index{fp\_cnt\_lsb}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
354 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
355 int fp_cnt_lsb(fp_int *a);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
356 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
357 This will return the number of adjacent least significant bits that are zero. This is equivalent
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
358 to the number of times two evenly divides $a$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
359
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
360 \section{Basic Algebra}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
361
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
362 The following functions round out the basic algebraic functionality of the library.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
363
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
364 \index{fp\_add} \index{fp\_sub} \index{fp\_mul} \index{fp\_sqr} \index{fp\_div} \index{fp\_mod}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
365 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
366 void fp_add(fp_int *a, fp_int *b, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
367 void fp_sub(fp_int *a, fp_int *b, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
368 void fp_mul(fp_int *a, fp_int *b, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
369 void fp_sqr(fp_int *a, fp_int *b);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
370 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
371 int fp_mod(fp_int *a, fp_int *b, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
372 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
373
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
374 The functions fp\_add(), fp\_sub() and fp\_mul() perform their respective operations on $a$ and
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
375 $b$ and store the result in $c$. The function fp\_sqr() computes $b = a^2$ and is faster than
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
376 using fp\_mul() to perform the same operation.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
377
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
378 The function fp\_div() divides $a$ by $b$ and stores the quotient in $c$ and remainder in $d$. Either
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
379 of $c$ and $d$ can be \textbf{NULL} if the result is not required. The function fp\_mod() is a simple
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
380 shortcut to find the remainder.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
381
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
382 \section{Modular Exponentiation}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
383 To compute a modular exponentiation use the following function.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
384
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
385 \index{fp\_exptmod}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
386 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
387 int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
388 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
389 This computes $d \equiv a^b \mbox{ (mod }c\mbox{)}$ for any odd $c$ and $b$. $b$ may be negative so long as
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
390 $a^{-1} \mbox{ (mod }c\mbox{)}$ exists. The initial value of $a$ may be larger than $c$. The size of $c$ must be
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
391 half of the maximum precision used during the build of the library. For example, by default $c$ must be less
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
392 than $2^{2048}$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
393
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
394 \section{Number Theoretic}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
395
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
396 To perform modular inverses, greatest common divisor or least common multiples use the following
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
397 functions.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
398
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
399 \index{fp\_invmod} \index{fp\_gcd} \index{fp\_lcm}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
400 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
401 int fp_invmod(fp_int *a, fp_int *b, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
402 void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
403 void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
404 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
405
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
406 The fp\_invmod() function will find the modular inverse of $a$ modulo an odd modulus $b$ and store
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
407 it in $c$ (provided it exists). The function fp\_gcd() will compute the greatest common
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
408 divisor of $a$ and $b$ and store it in $c$. Similarly the fp\_lcm() function will compute
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
409 the least common multiple of $a$ and $b$ and store it in $c$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
410
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
411 \section{Prime Numbers}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
412 To quickly test a number for primality call this function.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
413
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
414 \index{fp\_isprime}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
415 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
416 int fp_isprime(fp_int *a);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
417 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
418 This will return \textbf{FP\_YES} if $a$ is probably prime. It uses 256 trial divisions and
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
419 eight rounds of Rabin-Miller testing. Note that this routine performs modular exponentiations
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
420 which means that $a$ must be in a valid range of precision.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
421
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
422 \chapter{Porting TomsFastMath}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
423 \label{chap:asmops}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
424 \section{Getting Started}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
425 Porting TomsFastMath to a given processor target is usually a simple procedure. For the most part
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
426 assembly is used to get around the lack of a ``add with carry'' operation in the C language. To
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
427 make matters simpler the use of assembler is through macro blocks.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
428
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
429 Each ``port'' is defined by a block of code that re-defines the portable ISO C macros with assembler
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
430 inline blocks. To add a new port you must designate a TFM\_XXX define that will enable your
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
431 port when built.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
432
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
433 \section{Multiply with Comba}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
434 The file ``fp\_mul\_comba.c'' is responsible for providing the fast multiplication within the
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
435 library. This comba multiplication is fairly simple. It uses a sliding three digit carry
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
436 system with the variables $c0$, $c1$, $c2$. For every digit of output $c0$ is the what will
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
437 be that digit, $c1$ will carry into the next digit and $c2$ will be the ``c1'' carry for
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
438 the next digit. For every ``next'' digit effectively $c0$ is stored as output, $c1$ moves into
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
439 $c0$, $c2$ into $c1$ and zero into $c2$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
440
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
441 The following macros define the assmebler interface to the code.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
442
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
443 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
444 #define COMBA_START
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
445 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
446
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
447 This is issued at the beginning of the multiplication function. This is in place to allow you to
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
448 initialize any registers or machine words required. You can leave it blank if you do not need
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
449 it.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
450
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
451 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
452 #define COMBA_CLEAR \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
453 c0 = c1 = c2 = 0;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
454 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
455
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
456 This clears the three comba carries. If you are going to place carries in registers then
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
457 zero the appropriate registers. Note that the functions do not use $c0$, $c1$ or $c2$ directly
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
458 so you are free to ignore these varibles and use registers directly.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
459
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
460 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
461 #define COMBA_FORWARD \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
462 c0 = c1; c1 = c2; c2 = 0;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
463 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
464
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
465 This propagates the carries after a digit has been produced.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
466
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
467 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
468 #define COMBA_STORE(x) \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
469 x = c0;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
470 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
471
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
472 This stores the $c0$ digit in the memory location specified by $x$. Note that if you manually
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
473 aliased $c0$ with a register than just store that register in $x$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
474
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
475 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
476 #define COMBA_STORE2(x) \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
477 x = c1;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
478 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
479
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
480 This stores the $c1$ digit in the memory location specified by $x$. Note that if you manually
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
481 aliased $c1$ with a register than just store that register in $x$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
482
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
483 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
484 #define COMBA_FINI
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
485 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
486
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
487 If at the end of the function you need to perform some action fill this macro in.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
488
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
489 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
490 #define MULADD(i, j) \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
491 t = ((fp_word)i) * ((fp_word)j); \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
492 c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
493 c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
494 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
495
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
496 This macro performs the ``multiply and add'' step that is central to the comba
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
497 multiplier. It multiplies the fp\_digits $i$ and $j$ to produce a fp\_word result. Effectively
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
498 the double--digit value is added to the three-digit carry formed by $c0$, $c1$, $c2$ where $c0$
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
499 is the least significant digit.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
500
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
501 \section{Squaring with Comba}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
502 Squaring is similar to multiplication except that it uses a special ``multiply and add twice'' macro
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
503 that replaces multiplications that are not required.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
504
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
505 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
506 #define COMBA_START
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
507 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
508
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
509 This allows for any initialization code you might have.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
510
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
511 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
512 #define CLEAR_CARRY \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
513 c0 = c1 = c2 = 0;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
514 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
515
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
516 This will clear the carries. Like multiplication you can safely alias the three carry variables
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
517 to registers if you can/want to.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
518
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
519 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
520 #define COMBA_STORE(x) \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
521 x = c0;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
522 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
523
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
524 Store the $c0$ carry to a given memory location.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
525
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
526 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
527 #define COMBA_STORE2(x) \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
528 x = c1;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
529 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
530
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
531 Store the $c1$ carry to a given memory location.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
532
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
533 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
534 #define CARRY_FORWARD \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
535 c0 = c1; c1 = c2; c2 = 0;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
536 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
537
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
538 Forward propagate all three carry variables.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
539
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
540 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
541 #define COMBA_FINI
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
542 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
543
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
544 If you need to clean up at the end of the function.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
545
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
546 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
547 /* multiplies point i and j, updates carry "c1" and digit c2 */
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
548 #define SQRADD(i, j) \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
549 t = ((fp_word)i) * ((fp_word)j); \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
550 c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
551 c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
552 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
553
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
554 This is essentially the MULADD macro from the multiplication code.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
555
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
556 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
557 /* for squaring some of the terms are doubled... */
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
558 #define SQRADD2(i, j) \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
559 t = ((fp_word)i) * ((fp_word)j); \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
560 c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
561 c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
562 c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
563 c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
564 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
565
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
566 This is like SQRADD except it adds the produce twice. It's similar to
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
567 computing SQRADD(i, j*2).
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
568
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
569 To further make things interesting the squaring code also has ``doubles'' (see my LTM book chapter five...) which are
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
570 handled with these macros.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
571
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
572 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
573 #define SQRADDSC(i, j) \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
574 do { fp_word t; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
575 t = ((fp_word)i) * ((fp_word)j); \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
576 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
577 } while (0);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
578 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
579 This computes a product and stores it in the ``secondary'' carry registers $\left < sc0, sc1, sc2 \right >$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
580
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
581 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
582 #define SQRADDAC(i, j) \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
583 do { fp_word t; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
584 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
585 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
586 } while (0);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
587 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
588 This computes a product and adds it to the ``secondary'' carry registers.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
589
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
590 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
591 #define SQRADDDB \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
592 do { fp_word t; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
593 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
594 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
595 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
596 } while (0);
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
597 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
598 This doubles the ``secondary'' carry registers and adds the sum to the main carry registers. Really complicated.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
599
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
600 \section{Montgomery with Comba}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
601 Montgomery reduction is used in modular exponentiation and is most called function during
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
602 that operation. It's important to make sure this routine is very fast or all is lost.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
603
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
604 Unlike the two other comba routines this one does not use a single three--digit carry
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
605 system. It does have three--digit carries except that the routine steps through them
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
606 in the inner loop. This means you cannot alias them to registers (at all).
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
607
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
608 To make matters simple though the three arrays of carries are stored in one array. The
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
609 ``c0'' array resides in $c[0 \ldots OFF1-1]$, ``c1'' in $c[OFF1 \ldots OFF2-1]$ and ``c2'' in
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
610 $c[OFF2 \ldots OFF2+FP\_SIZE-1]$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
611
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
612 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
613 #define MONT_START
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
614 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
615
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
616 This allows you to insert anything at the start that you need.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
617
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
618 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
619 #define MONT_FINI
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
620 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
621
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
622 This allows you to insert anything at the end that you need.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
623
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
624 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
625 #define LOOP_START \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
626 mu = c[x] * mp;
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
627 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
628
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
629 This computes the $\mu$ value for the inner loop. You can safely alias $mu$ and $mp$ to
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
630 a register if you want.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
631
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
632 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
633 #define INNERMUL \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
634 do { fp_word t; \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
635 _c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
636 (((fp_word)mu) * ((fp_word)*tmpm++)); \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
637 cy = (t >> DIGIT_BIT); \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
638 } while (0)
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
639 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
640
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
641 This computes the inner product and adds it to the destination and carry variable $cy$.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
642 This uses the $mu$ value computed above (can be in a register already) and the
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
643 $cy$ which is a chaining carry. Inside the INNERMUL loop the $cy$ value can be kept
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
644 inside a register (hint: it always starts as $cy = 0$ in the first iteration).
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
645
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
646 Upon completion of the inner loop the macro LOOP\_END is called which is used to fetch
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
647 $cy$ into the variable the C program can see. This is where, if you cached $cy$ in a
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
648 register you would copy it to the locally accessible C variable.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
649
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
650 \begin{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
651 #define PROPCARRY \
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
652 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
653 \end{verbatim}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
654
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
655 This propagates the carry upwards by one digit.
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
656
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
657 \input{tfm.ind}
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
658
a362b62d38b2 Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
659 \end{document}