Speed up crypto derivations with vartime ops

Implemented ge_scalarmult_vartime (a*B) and ge_scalarmult_base_vartime (a*G). These come from libsodium code as the rest of crypto-ops.c
This commit is contained in:
WeebDataHoarder
2025-09-29 13:19:51 +02:00
committed by SChernykh
parent 7cb6a0dcc3
commit 22cabb5ac9
3 changed files with 113 additions and 5 deletions

View File

@@ -1628,6 +1628,32 @@ static void select(ge_precomp *t, int pos, signed char b) {
ge_precomp_cmov(t, &minust, bnegative);
}
// With these select_vartime/ge_scalarmult_base_vartime I got ~25% speed up comparing to the select/ge_scalarmult_base -- sowle
static void select_vartime(ge_precomp *t, int pos, signed char b) {
unsigned char bnegative = negative(b);
unsigned char babs = b - (((-bnegative) & b) << 1);
const ge_precomp* base;
if (babs == 0)
{
ge_precomp_0(t);
}
else if (bnegative == 0)
{
base = &ge_base[pos][babs - 1];
fe_copy(t->yplusx, base->yplusx);
fe_copy(t->yminusx, base->yminusx);
fe_copy(t->xy2d, base->xy2d);
}
else
{
base = &ge_base[pos][babs - 1];
fe_copy(t->yplusx, base->yminusx);
fe_copy(t->yminusx, base->yplusx);
fe_neg(t->xy2d, base->xy2d);
}
}
/*
h = a * B
where a = a[0]+256*a[1]+...+256^31 a[31]
@@ -1679,6 +1705,48 @@ void ge_scalarmult_base(ge_p3 *h, const unsigned char *a) {
}
}
void ge_scalarmult_base_vartime(ge_p3 *h, const unsigned char *a) {
signed char e[64];
signed char carry;
ge_p1p1 r;
ge_p2 s;
ge_precomp t;
int i;
for (i = 0; i < 32; ++i) {
e[2 * i + 0] = (a[i] >> 0) & 15;
e[2 * i + 1] = (a[i] >> 4) & 15;
}
/* each e[i] is between 0 and 15 */
/* e[63] is between 0 and 7 */
carry = 0;
for (i = 0; i < 63; ++i) {
e[i] += carry;
carry = e[i] + 8;
carry >>= 4;
e[i] -= carry << 4;
}
e[63] += carry;
/* each e[i] is between -8 and 8 */
ge_p3_0(h);
for (i = 1; i < 64; i += 2) {
select_vartime(&t, i / 2, e[i]);
ge_madd(&r, h, &t); ge_p1p1_to_p3(h, &r);
}
ge_p3_dbl(&r, h); ge_p1p1_to_p2(&s, &r);
ge_p2_dbl(&r, &s); ge_p1p1_to_p2(&s, &r);
ge_p2_dbl(&r, &s); ge_p1p1_to_p2(&s, &r);
ge_p2_dbl(&r, &s); ge_p1p1_to_p3(h, &r);
for (i = 0; i < 64; i += 2) {
select_vartime(&t, i / 2, e[i]);
ge_madd(&r, h, &t); ge_p1p1_to_p3(h, &r);
}
}
/* From ge_sub.c */
/*
@@ -2096,6 +2164,43 @@ void ge_scalarmult(ge_p2 *r, const unsigned char *a, const ge_p3 *A) {
}
}
void ge_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A) {
signed char aslide[256];
ge_dsmp Ai; /* A, 3A, 5A, 7A, 9A, 11A, 13A, 15A */
ge_p1p1 t;
ge_p3 u;
ge_p2 r_p2;
int i;
slide(aslide, a);
ge_dsm_precomp(Ai, A);
ge_p2_0(&r_p2);
ge_p2_0(r);
for (i = 255; i >= 0; --i) {
if (aslide[i]) break;
}
for (; i >= 0; --i) {
ge_p2_dbl(&t, &r_p2);
if (aslide[i] > 0) {
ge_p1p1_to_p3(&u, &t);
ge_add(&t, &u, &Ai[aslide[i] / 2]);
}
else if (aslide[i] < 0) {
ge_p1p1_to_p3(&u, &t);
ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
}
if (i != 0)
ge_p1p1_to_p2(&r_p2, &t);
else
ge_p1p1_to_p2(r, &t); // last step
}
}
void ge_scalarmult_p3(ge_p3 *r3, const unsigned char *a, const ge_p3 *A) {
signed char e[64];
int carry, carry2, i;

View File

@@ -118,6 +118,8 @@ void ge_p3_tobytes(unsigned char *, const ge_p3 *);
extern const ge_precomp ge_base[32][8];
void ge_scalarmult_base(ge_p3 *, const unsigned char *);
void ge_scalarmult_base_vartime(ge_p3 *h, const unsigned char *a);
/* From ge_tobytes.c */
void ge_tobytes(unsigned char *, const ge_p2 *);
@@ -129,6 +131,7 @@ void sc_reduce(unsigned char *);
/* New code */
void ge_scalarmult(ge_p2 *, const unsigned char *, const ge_p3 *);
void ge_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A);
void ge_scalarmult_p3(ge_p3 *, const unsigned char *, const ge_p3 *);
void ge_double_scalarmult_precomp_vartime(ge_p2 *, const unsigned char *, const ge_p3 *, const unsigned char *, const ge_dsmp);
void ge_triple_scalarmult_precomp_vartime(ge_p2 *, const unsigned char *, const ge_dsmp, const unsigned char *, const ge_dsmp, const unsigned char *, const ge_dsmp);