Mercurial > dropbear
annotate tomsfastmath/tfm.tex @ 646:f10335e5e42f dropbear-tfm
- More asm constraint fixes. Now seems to build OK on 32-bit OS X.
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Wed, 30 Nov 2011 23:03:47 +0800 |
parents | a362b62d38b2 |
children |
rev | line source |
---|---|
643
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
1 \documentclass[b5paper]{book} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
2 \usepackage{hyperref} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
3 \usepackage{makeidx} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
4 \usepackage{amssymb} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
5 \usepackage{color} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
6 \usepackage{alltt} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
7 \usepackage{graphicx} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
8 \usepackage{layout} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
9 \def\union{\cup} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
10 \def\intersect{\cap} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
11 \def\getsrandom{\stackrel{\rm R}{\gets}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
12 \def\cross{\times} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
13 \def\cat{\hspace{0.5em} \| \hspace{0.5em}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
14 \def\catn{$\|$} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
15 \def\divides{\hspace{0.3em} | \hspace{0.3em}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
16 \def\nequiv{\not\equiv} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
17 \def\approx{\raisebox{0.2ex}{\mbox{\small $\sim$}}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
18 \def\lcm{{\rm lcm}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
19 \def\gcd{{\rm gcd}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
20 \def\log{{\rm log}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
21 \def\ord{{\rm ord}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
22 \def\abs{{\mathit abs}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
23 \def\rep{{\mathit rep}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
24 \def\mod{{\mathit\ mod\ }} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
25 \renewcommand{\pmod}[1]{\ ({\rm mod\ }{#1})} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
26 \newcommand{\floor}[1]{\left\lfloor{#1}\right\rfloor} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
27 \newcommand{\ceil}[1]{\left\lceil{#1}\right\rceil} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
28 \def\Or{{\rm\ or\ }} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
29 \def\And{{\rm\ and\ }} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
30 \def\iff{\hspace{1em}\Longleftrightarrow\hspace{1em}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
31 \def\implies{\Rightarrow} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
32 \def\undefined{{\rm ``undefined"}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
33 \def\Proof{\vspace{1ex}\noindent {\bf Proof:}\hspace{1em}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
34 \let\oldphi\phi |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
35 \def\phi{\varphi} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
36 \def\Pr{{\rm Pr}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
37 \newcommand{\str}[1]{{\mathbf{#1}}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
38 \def\F{{\mathbb F}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
39 \def\N{{\mathbb N}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
40 \def\Z{{\mathbb Z}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
41 \def\R{{\mathbb R}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
42 \def\C{{\mathbb C}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
43 \def\Q{{\mathbb Q}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
44 \definecolor{DGray}{gray}{0.5} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
45 \newcommand{\emailaddr}[1]{\mbox{$<${#1}$>$}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
46 \def\twiddle{\raisebox{0.3ex}{\mbox{\tiny $\sim$}}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
47 \def\gap{\vspace{0.5ex}} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
48 \makeindex |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
49 \begin{document} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
50 \frontmatter |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
51 \pagestyle{empty} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
52 \title{TomsFastMath User Manual \\ v0.12} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
53 \author{Tom St Denis \\ [email protected]} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
54 \maketitle |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
55 This text and library are all hereby placed in the public domain. This book has been formatted for B5 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
56 [176x250] paper using the \LaTeX{} {\em book} macro package. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
57 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
58 \vspace{13cm} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
59 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
60 \begin{flushleft}This project was sponsored in part by |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
61 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
62 Secure Science Corporation \url{http://www.securescience.net}. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
63 \end{flushleft} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
64 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
65 \tableofcontents |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
66 \listoffigures |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
67 \mainmatter |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
68 \pagestyle{headings} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
69 \chapter{Introduction} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
70 \section{What is TomsFastMath?} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
71 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
72 TomsFastMath is meant to be a very fast yet still fairly portable and easy to port large |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
73 integer arithmetic library written in ISO C. The goal specifically is to be able to perform |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
74 very fast modular exponentiations and other related functions required for ECC, DH and RSA |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
75 cryptosystems. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
76 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
77 Most of the library is pure ISO C portable source code while a small portion (three files) contain |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
78 a mixture of ISO C and assembler inline fragments. Compared to LibTomMath this new library is |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
79 meant to be much faster while sacrificing flexibiltiy. This is accomplished through several means. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
80 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
81 \begin{enumerate} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
82 \item The new code is slightly messier and contains asm blocks. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
83 \item This uses fixed not multiple precision integers. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
84 \item It is designed only for fast modular exponentiations [e.g. less flexibility]. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
85 \end{enumerate} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
86 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
87 To mitigate some of the problems that arise from using assembler it has been carefully and |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
88 appropriately used where it would make the most gain in performance. Also we use macro's |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
89 for assembler code which allows new ports to be inserted easily. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
90 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
91 The new code uses fixed precision arithmetic which means at compile time you choose a maximum |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
92 precision and all numbers are limited to that. This has the benefit of not requiring any |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
93 memory heap operations (which are slow) in any of the functions. It has the downside that |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
94 integers that are too large are truncated. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
95 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
96 The goal of this library is to be able to perform modular exponentiations (with an odd modulus) very |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
97 fast. This is what takes the most time in systems such as RSA and DH. This also requires |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
98 fast multiplication and squaring and has the side effect of speeding up ECC operations as well. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
99 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
100 \section{License} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
101 TomsFastMath is public domain. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
102 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
103 \section{Building} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
104 To build the library simply type ``make''. Or to install in typical *unix like directories use |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
105 ``make install''. Similarly a shared library can be built with ``make -f makefile.shared install''. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
106 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
107 You can build the test program with ``make test''. To perform simple static testing (useful to |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
108 test out new assembly ports) use the stest program. Type ``make stest'' and run it on your |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
109 target. The program will perform three multiplications, squarings and montgomery reductions. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
110 Likely if your assembly code is invalid this code will exhibit the bug. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
111 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
112 \subsection{Intel CC} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
113 In theory you should be able to build the library with |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
114 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
115 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
116 CFLAGS="-O3 -ip" CC=icc make IGNORE_SPEED=1 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
117 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
118 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
119 However, Intels inline assembler is way less advanced than GCCs. As a result it doesn't compile. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
120 Fortunately it doesn't really matter. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
121 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
122 \subsection{MSVC} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
123 The library doesn't build with MSVC. Imagine that. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
124 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
125 \subsection{Build Limitations} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
126 TomsFastMath has the following build requirements which are non--portable but under most |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
127 circumstances not problematic. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
128 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
129 \begin{enumerate} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
130 \item ``CHAR\_BIT'' must be eight. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
131 \item The ``fp\_digit'' type must be a multiple of eight bits long. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
132 \item The ``fp\_word'' must be at least twice the length of fp\_digit. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
133 \end{enumerate} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
134 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
135 \subsection{Optimization Configuration} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
136 By default TFM is configured for 32--bit digits using ISO C source code. This mode while portable |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
137 is not very efficient. While building the library (from scratch) you can define one of |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
138 several ``CFLAGS'' defines. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
139 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
140 For example, to build with with SSE2 optimizations type |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
141 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
142 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
143 CFLAGS=-DTFM_SSE2 make clean libtfm.a |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
144 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
145 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
146 \subsubsection{x86--32} The ``x86--32'' mode is defined by ``TFM\_X86'' and covers all |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
147 i386 and beyond processors. It requires GCC to build and only works with 32--bit digits. In this |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
148 mode fp\_digit is 32--bits and fp\_word is 64--bits. This mode will be autodetected when building |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
149 with GCC to an ``i386'' target. You can override this behaviour by defining TFM\_NO\_ASM or |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
150 another optimization mode (such as SSE2). |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
151 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
152 \subsubsection{SSE2} The ``SSE2'' mode is defined by ``TFM\_SSE2'' and requires a Pentium 4, Pentium |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
153 M or Athlon64 processor. It requires GCC to build. Note that you shouldn't define both |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
154 TFM\_X86 and TFM\_SSE2 at the same time. This mode only works with 32--bit digits. In this |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
155 mode fp\_digit is 32--bits and fp\_word is 64--bits. While this mode will work on the AMD Athlon64 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
156 series of processors it is less efficient than the native ``x86--64'' mode and not recommended. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
157 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
158 There is an additional ``TFM\_PRESCOTT'' flag that you can define for P4 Prescott processors. This causes |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
159 the mul/sqr functions to use x86\_32 and the montgomery reduction to use SSE2 which is (so far) the fastest |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
160 combination. If you are using an older (e.g. Northwood) generation P4 don't define this. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
161 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
162 \subsubsection{x86--64} The ``x86--64'' mode is defined by ``TFM\_X86\_64'' and requires a |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
163 ``x86--64'' capable processor (Athlon64 and future Pentium processors). It requires GCC to |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
164 build and only works with 64--bit digits. Note that by enabling this mode it will automatically |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
165 enable 64--bit digits. In this mode fp\_digit is 64--bits and fp\_word is 128--bits. This mode will |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
166 be autodetected when building with GCC to an ``x86--64'' target. You can override this behaviour by defining |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
167 TFM\_NO\_ASM. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
168 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
169 \subsubsection{ARM} The ``ARM'' mode is defined by ``TFM\_ARM'' and requires a ARMv4 with the M instructions (enhanced |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
170 multipliers) or higher processor. It requires GCC and works with 32--bit digits. In this mode fp\_digit is 32--bits and |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
171 fp\_word is 64--bits. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
172 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
173 \subsubsection{PPC32} The ``PPC32'' mode is defined by ``TFM\_PPC32'' and requires a standard PPC processor. It doesn't |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
174 use altivec or other extensions so it should work on all compliant implementations of PPC. It requires GCC and works |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
175 with 32--bit digits. In this mode fp\_digit is 32--bits and fp\_word is 64--bits. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
176 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
177 \subsubsection{PPC64} The ``PPC64'' mode is defined by ``TFM\_PPC64'' and requires a 64--bit PPC processor. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
178 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
179 \subsubsection{AVR32} The ``AVR32'' mode is defined by ``TFM\_AVR32'' and requires an Atmel AVR32 processor. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
180 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
181 \subsubsection{Future Releases} Future releases will support additional platform optimizations. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
182 Developers of MIPS and SPARC platforms are encouraged to submit GCC asm inline patches |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
183 (see chapter \ref{chap:asmops} for more information). |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
184 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
185 \begin{figure}[here] |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
186 \begin{small} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
187 \begin{center} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
188 \begin{tabular}{|l|l|} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
189 \hline \textbf{Processor} & \textbf{Recommended Mode} \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
190 \hline All 32--bit x86 platforms & TFM\_X86 \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
191 \hline Pentium 4 & TFM\_SSE2 \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
192 \hline Pentium 4 Prescott & TFM\_SSE2 + TFM\_PRESCOTT \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
193 \hline Athlon64 & TFM\_X86\_64 \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
194 \hline ARMv4 or higher with M & TFM\_ARM \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
195 \hline G3/G4 (32-bit PPC) & TFM\_PPC32 \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
196 \hline G5 (64-bit PPC) & TFM\_PPC64 \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
197 \hline Atmel AVR32 & TFM\_AVR32 \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
198 \hline &\\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
199 \hline x86--32 or x86--64 (with GCC) & Leave blank and let autodetect work \\ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
200 \hline |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
201 \end{tabular} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
202 \caption{Recommended Build Modes} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
203 \end{center} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
204 \end{small} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
205 \end{figure} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
206 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
207 \subsection{Build Configurations} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
208 TomsFastMath is configurable in terms of which unrolled code (if any) is included. By default, the majority of the code is included which |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
209 results in large binaries. The first flag to try out is TFM\_ALREADY\_SET which tells TFM to turn off \textbf{all} unrolled code. This will |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
210 result in a smaller library but also a much slower library. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
211 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
212 From this clean state, you can start enabling unrolled code for given cryptographic tasks at hand. A series of TFM\_MULXYZ and TFM\_SQRXYZ macros |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
213 exist to enable specific unrolled code. For instance, TFM\_MUL32 will enable a 32 digit unrolled multiplier. For a complete list see the tfm.h header |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
214 file. Keep in mind this is for digits not bits. For example, you should enable TFM\_MUL16 if you are doing 1024-bit exptmods on a 64--bit platform, enable |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
215 TFM\_MUL32 on 32--bit platforms. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
216 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
217 To help developers use ECC there are a set of defines for the five NIST curve sizes. They are named TFM\_ECCXYZ where XYZ is one of 192, 224, 256, 384, or 521. These |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
218 enable the multipliers and squaring code for a given curve, autodetecting 64--bit platforms as well. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
219 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
220 \subsection{Precision Configuration} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
221 The precision of all integers in this library are fixed to a limited precision. Essentially |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
222 the rule of setting the precision is if you plan on doing modular exponentiation with $k$--bit |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
223 numbers than the precision must be fixed to $2k$--bits plus four digits. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
224 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
225 This is changed by altering the value of ``FP\_MAX\_SIZE'' in tfm.h to your desired size. By default, |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
226 the library is configured to handle upto 2048--bit inputs to the modular exponentiator. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
227 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
228 \chapter{Getting Started} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
229 \section{Data Types} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
230 TomsFastMath is a large fixed precision integer library. It provides the functionality to |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
231 manipulate large signed integers through a relatively trivial api and a single data type. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
232 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
233 The ``fp\_int'' or fixed precision integer is the data type that the functions operate with. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
234 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
235 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
236 typedef struct { |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
237 fp_digit dp[FP_SIZE]; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
238 int used, |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
239 sign; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
240 } fp_int; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
241 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
242 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
243 The \textbf{dp} member is the array of digits that forms the number. It must always be zero |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
244 padded. The \textbf{used} member is the count of digits used in the array. Although the |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
245 precision is fixed the algorithms are still tuned to not process the entire array if it |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
246 does not have to. The \textbf{sign} indicates the sign of the integer. It is \textbf{FP\_ZPOS} (0) |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
247 if the integer is zero or positive and \textbf{FP\_NEG} (1) otherwise. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
248 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
249 \section{Initialization} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
250 \subsection{Simple Initialization} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
251 To initialize an integer to the default state of zero use the fp\_init() function. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
252 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
253 \index{fp\_init} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
254 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
255 void fp_init(fp_int *a); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
256 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
257 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
258 This will initialize the fp\_int $a$ to zero. Note that the function fp\_zero() is an alias |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
259 for fp\_init(). |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
260 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
261 \subsection{Initialize Small Constants} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
262 To initialize an integer with a small single digit value use the fp\_set() function. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
263 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
264 \index{fp\_set} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
265 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
266 void fp_set(fp_int *a, fp_digit b); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
267 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
268 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
269 This will initialize $a$ and set it equal to the digit $b$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
270 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
271 \subsection{Initialize Copy} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
272 To initialize an integer with a copy of another integer use the fp\_init\_copy() function. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
273 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
274 \index{fp\_init\_copy} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
275 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
276 void fp_init_copy(fp_int *a, fp_int *b) |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
277 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
278 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
279 This will initialize $a$ as a copy of $b$. Note that for compatibility with LibTomMath the function |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
280 fp\_copy() is also provided. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
281 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
282 \chapter{Arithmetic Operations} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
283 \section{Odds and Evens} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
284 To quickly and easily tell if an integer is zero, odd or even use the following functions. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
285 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
286 \index{fp\_iszero} \index{fp\_iseven} \index{fp\_isodd} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
287 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
288 int fp_iszero(fp_int *a); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
289 int fp_iseven(fp_int *a); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
290 int fp_isodd(fp_int *a); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
291 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
292 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
293 These will return \textbf{FP\_YES} if the answer to their respective questions is yes. Otherwise they |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
294 return \textbf{FP\_NO}. Note that these are implemented as macros and as such you should avoid using |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
295 ++ or --~-- operators on the input operand. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
296 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
297 \section{Sign Manipulation} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
298 To negate or compute the absolute of an integer use the following functions. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
299 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
300 \index{fp\_neg} \index{fp\_abs} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
301 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
302 void fp_neg(fp_int *a, fp_int *b); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
303 void fp_abs(fp_int *a, fp_int *b); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
304 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
305 This will compute the negation (or absolute) of $a$ and store the result in $b$. Note that these |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
306 are implemented as macros and as such you should avoid using ++ or --~-- operators on the input |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
307 operand. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
308 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
309 \section{Comparisons} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
310 To perform signed or unsigned comparisons use following functions. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
311 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
312 \index{fp\_cmp} \index{fp\_cmp\_mag} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
313 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
314 int fp_cmp(fp_int *a, fp_int *b); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
315 int fp_cmp_mag(fp_int *a, fp_int *b); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
316 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
317 These will compare $a$ to $b$. They will return \textbf{FP\_GT} if $a$ is larger than $b$, |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
318 \textbf{FP\_EQ} if they are equal and \textbf{FP\_LT} if $a$ is less than $b$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
319 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
320 The function fp\_cmp performs signed comparisons while the other performs unsigned comparisons. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
321 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
322 \section{Shifting} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
323 To shift the digits of an fp\_int left or right use the following functions. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
324 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
325 \index{fp\_lshd} \index{fp\_rshd} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
326 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
327 void fp_lshd(fp_int *a, int x); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
328 void fp_rshd(fp_int *a, int x); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
329 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
330 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
331 These will shift the digits of $a$ left (or right respectively) $x$ digits. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
332 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
333 To shift individual bits of an fp\_int use the following functions. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
334 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
335 \index{fp\_div\_2d} \index{fp\_mod\_2d} \index{fp\_mul\_2d} \index{fp\_div\_2} \index{fp\_mul\_2} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
336 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
337 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
338 void fp_mod_2d(fp_int *a, int b, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
339 void fp_mul_2d(fp_int *a, int b, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
340 void fp_mul_2(fp_int *a, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
341 void fp_div_2(fp_int *a, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
342 void fp_2expt(fp_int *a, int b); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
343 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
344 fp\_div\_2d() will divide $a$ by $2^b$ and store the quotient in $c$ and remainder in $d$. Either of |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
345 $c$ or $d$ can be \textbf{NULL} if their value is not required. fp\_mod\_2d() is a shortcut to |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
346 compute the remainder directly. fp\_mul\_2d() will multiply $a$ by $2^b$ and store the result in $c$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
347 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
348 The fp\_mul\_2() and fp\_div\_2() functions are optimized multiplication and divisions by two. The |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
349 function fp\_2expt() will compute $a = 2^b$ quickly. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
350 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
351 To quickly count the number of least significant bits that are zero use the following function. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
352 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
353 \index{fp\_cnt\_lsb} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
354 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
355 int fp_cnt_lsb(fp_int *a); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
356 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
357 This will return the number of adjacent least significant bits that are zero. This is equivalent |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
358 to the number of times two evenly divides $a$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
359 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
360 \section{Basic Algebra} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
361 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
362 The following functions round out the basic algebraic functionality of the library. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
363 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
364 \index{fp\_add} \index{fp\_sub} \index{fp\_mul} \index{fp\_sqr} \index{fp\_div} \index{fp\_mod} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
365 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
366 void fp_add(fp_int *a, fp_int *b, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
367 void fp_sub(fp_int *a, fp_int *b, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
368 void fp_mul(fp_int *a, fp_int *b, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
369 void fp_sqr(fp_int *a, fp_int *b); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
370 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
371 int fp_mod(fp_int *a, fp_int *b, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
372 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
373 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
374 The functions fp\_add(), fp\_sub() and fp\_mul() perform their respective operations on $a$ and |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
375 $b$ and store the result in $c$. The function fp\_sqr() computes $b = a^2$ and is faster than |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
376 using fp\_mul() to perform the same operation. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
377 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
378 The function fp\_div() divides $a$ by $b$ and stores the quotient in $c$ and remainder in $d$. Either |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
379 of $c$ and $d$ can be \textbf{NULL} if the result is not required. The function fp\_mod() is a simple |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
380 shortcut to find the remainder. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
381 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
382 \section{Modular Exponentiation} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
383 To compute a modular exponentiation use the following function. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
384 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
385 \index{fp\_exptmod} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
386 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
387 int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
388 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
389 This computes $d \equiv a^b \mbox{ (mod }c\mbox{)}$ for any odd $c$ and $b$. $b$ may be negative so long as |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
390 $a^{-1} \mbox{ (mod }c\mbox{)}$ exists. The initial value of $a$ may be larger than $c$. The size of $c$ must be |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
391 half of the maximum precision used during the build of the library. For example, by default $c$ must be less |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
392 than $2^{2048}$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
393 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
394 \section{Number Theoretic} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
395 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
396 To perform modular inverses, greatest common divisor or least common multiples use the following |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
397 functions. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
398 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
399 \index{fp\_invmod} \index{fp\_gcd} \index{fp\_lcm} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
400 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
401 int fp_invmod(fp_int *a, fp_int *b, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
402 void fp_gcd(fp_int *a, fp_int *b, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
403 void fp_lcm(fp_int *a, fp_int *b, fp_int *c); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
404 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
405 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
406 The fp\_invmod() function will find the modular inverse of $a$ modulo an odd modulus $b$ and store |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
407 it in $c$ (provided it exists). The function fp\_gcd() will compute the greatest common |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
408 divisor of $a$ and $b$ and store it in $c$. Similarly the fp\_lcm() function will compute |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
409 the least common multiple of $a$ and $b$ and store it in $c$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
410 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
411 \section{Prime Numbers} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
412 To quickly test a number for primality call this function. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
413 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
414 \index{fp\_isprime} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
415 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
416 int fp_isprime(fp_int *a); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
417 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
418 This will return \textbf{FP\_YES} if $a$ is probably prime. It uses 256 trial divisions and |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
419 eight rounds of Rabin-Miller testing. Note that this routine performs modular exponentiations |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
420 which means that $a$ must be in a valid range of precision. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
421 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
422 \chapter{Porting TomsFastMath} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
423 \label{chap:asmops} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
424 \section{Getting Started} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
425 Porting TomsFastMath to a given processor target is usually a simple procedure. For the most part |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
426 assembly is used to get around the lack of a ``add with carry'' operation in the C language. To |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
427 make matters simpler the use of assembler is through macro blocks. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
428 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
429 Each ``port'' is defined by a block of code that re-defines the portable ISO C macros with assembler |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
430 inline blocks. To add a new port you must designate a TFM\_XXX define that will enable your |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
431 port when built. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
432 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
433 \section{Multiply with Comba} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
434 The file ``fp\_mul\_comba.c'' is responsible for providing the fast multiplication within the |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
435 library. This comba multiplication is fairly simple. It uses a sliding three digit carry |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
436 system with the variables $c0$, $c1$, $c2$. For every digit of output $c0$ is the what will |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
437 be that digit, $c1$ will carry into the next digit and $c2$ will be the ``c1'' carry for |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
438 the next digit. For every ``next'' digit effectively $c0$ is stored as output, $c1$ moves into |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
439 $c0$, $c2$ into $c1$ and zero into $c2$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
440 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
441 The following macros define the assmebler interface to the code. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
442 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
443 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
444 #define COMBA_START |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
445 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
446 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
447 This is issued at the beginning of the multiplication function. This is in place to allow you to |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
448 initialize any registers or machine words required. You can leave it blank if you do not need |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
449 it. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
450 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
451 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
452 #define COMBA_CLEAR \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
453 c0 = c1 = c2 = 0; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
454 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
455 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
456 This clears the three comba carries. If you are going to place carries in registers then |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
457 zero the appropriate registers. Note that the functions do not use $c0$, $c1$ or $c2$ directly |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
458 so you are free to ignore these varibles and use registers directly. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
459 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
460 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
461 #define COMBA_FORWARD \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
462 c0 = c1; c1 = c2; c2 = 0; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
463 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
464 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
465 This propagates the carries after a digit has been produced. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
466 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
467 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
468 #define COMBA_STORE(x) \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
469 x = c0; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
470 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
471 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
472 This stores the $c0$ digit in the memory location specified by $x$. Note that if you manually |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
473 aliased $c0$ with a register than just store that register in $x$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
474 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
475 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
476 #define COMBA_STORE2(x) \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
477 x = c1; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
478 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
479 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
480 This stores the $c1$ digit in the memory location specified by $x$. Note that if you manually |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
481 aliased $c1$ with a register than just store that register in $x$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
482 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
483 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
484 #define COMBA_FINI |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
485 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
486 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
487 If at the end of the function you need to perform some action fill this macro in. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
488 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
489 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
490 #define MULADD(i, j) \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
491 t = ((fp_word)i) * ((fp_word)j); \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
492 c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
493 c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
494 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
495 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
496 This macro performs the ``multiply and add'' step that is central to the comba |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
497 multiplier. It multiplies the fp\_digits $i$ and $j$ to produce a fp\_word result. Effectively |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
498 the double--digit value is added to the three-digit carry formed by $c0$, $c1$, $c2$ where $c0$ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
499 is the least significant digit. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
500 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
501 \section{Squaring with Comba} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
502 Squaring is similar to multiplication except that it uses a special ``multiply and add twice'' macro |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
503 that replaces multiplications that are not required. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
504 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
505 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
506 #define COMBA_START |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
507 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
508 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
509 This allows for any initialization code you might have. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
510 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
511 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
512 #define CLEAR_CARRY \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
513 c0 = c1 = c2 = 0; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
514 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
515 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
516 This will clear the carries. Like multiplication you can safely alias the three carry variables |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
517 to registers if you can/want to. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
518 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
519 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
520 #define COMBA_STORE(x) \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
521 x = c0; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
522 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
523 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
524 Store the $c0$ carry to a given memory location. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
525 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
526 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
527 #define COMBA_STORE2(x) \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
528 x = c1; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
529 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
530 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
531 Store the $c1$ carry to a given memory location. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
532 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
533 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
534 #define CARRY_FORWARD \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
535 c0 = c1; c1 = c2; c2 = 0; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
536 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
537 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
538 Forward propagate all three carry variables. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
539 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
540 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
541 #define COMBA_FINI |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
542 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
543 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
544 If you need to clean up at the end of the function. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
545 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
546 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
547 /* multiplies point i and j, updates carry "c1" and digit c2 */ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
548 #define SQRADD(i, j) \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
549 t = ((fp_word)i) * ((fp_word)j); \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
550 c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
551 c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
552 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
553 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
554 This is essentially the MULADD macro from the multiplication code. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
555 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
556 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
557 /* for squaring some of the terms are doubled... */ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
558 #define SQRADD2(i, j) \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
559 t = ((fp_word)i) * ((fp_word)j); \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
560 c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
561 c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
562 c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
563 c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
564 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
565 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
566 This is like SQRADD except it adds the produce twice. It's similar to |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
567 computing SQRADD(i, j*2). |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
568 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
569 To further make things interesting the squaring code also has ``doubles'' (see my LTM book chapter five...) which are |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
570 handled with these macros. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
571 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
572 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
573 #define SQRADDSC(i, j) \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
574 do { fp_word t; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
575 t = ((fp_word)i) * ((fp_word)j); \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
576 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
577 } while (0); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
578 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
579 This computes a product and stores it in the ``secondary'' carry registers $\left < sc0, sc1, sc2 \right >$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
580 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
581 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
582 #define SQRADDAC(i, j) \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
583 do { fp_word t; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
584 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
585 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
586 } while (0); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
587 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
588 This computes a product and adds it to the ``secondary'' carry registers. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
589 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
590 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
591 #define SQRADDDB \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
592 do { fp_word t; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
593 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
594 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
595 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
596 } while (0); |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
597 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
598 This doubles the ``secondary'' carry registers and adds the sum to the main carry registers. Really complicated. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
599 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
600 \section{Montgomery with Comba} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
601 Montgomery reduction is used in modular exponentiation and is most called function during |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
602 that operation. It's important to make sure this routine is very fast or all is lost. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
603 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
604 Unlike the two other comba routines this one does not use a single three--digit carry |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
605 system. It does have three--digit carries except that the routine steps through them |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
606 in the inner loop. This means you cannot alias them to registers (at all). |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
607 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
608 To make matters simple though the three arrays of carries are stored in one array. The |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
609 ``c0'' array resides in $c[0 \ldots OFF1-1]$, ``c1'' in $c[OFF1 \ldots OFF2-1]$ and ``c2'' in |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
610 $c[OFF2 \ldots OFF2+FP\_SIZE-1]$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
611 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
612 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
613 #define MONT_START |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
614 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
615 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
616 This allows you to insert anything at the start that you need. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
617 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
618 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
619 #define MONT_FINI |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
620 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
621 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
622 This allows you to insert anything at the end that you need. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
623 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
624 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
625 #define LOOP_START \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
626 mu = c[x] * mp; |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
627 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
628 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
629 This computes the $\mu$ value for the inner loop. You can safely alias $mu$ and $mp$ to |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
630 a register if you want. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
631 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
632 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
633 #define INNERMUL \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
634 do { fp_word t; \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
635 _c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
636 (((fp_word)mu) * ((fp_word)*tmpm++)); \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
637 cy = (t >> DIGIT_BIT); \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
638 } while (0) |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
639 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
640 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
641 This computes the inner product and adds it to the destination and carry variable $cy$. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
642 This uses the $mu$ value computed above (can be in a register already) and the |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
643 $cy$ which is a chaining carry. Inside the INNERMUL loop the $cy$ value can be kept |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
644 inside a register (hint: it always starts as $cy = 0$ in the first iteration). |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
645 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
646 Upon completion of the inner loop the macro LOOP\_END is called which is used to fetch |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
647 $cy$ into the variable the C program can see. This is where, if you cached $cy$ in a |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
648 register you would copy it to the locally accessible C variable. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
649 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
650 \begin{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
651 #define PROPCARRY \ |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
652 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0) |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
653 \end{verbatim} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
654 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
655 This propagates the carry upwards by one digit. |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
656 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
657 \input{tfm.ind} |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
658 |
a362b62d38b2
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
Matt Johnston <matt@ucc.asn.au>
parents:
diff
changeset
|
659 \end{document} |