Speed up crypto derivations with vartime ops

Implemented ge_scalarmult_vartime (a*B) and ge_scalarmult_base_vartime (a*G). These come from libsodium code as the rest of crypto-ops.c
This commit is contained in:
WeebDataHoarder
2025-09-29 13:19:51 +02:00
committed by SChernykh
parent 7cb6a0dcc3
commit 22cabb5ac9
3 changed files with 113 additions and 5 deletions

View File

@@ -1628,6 +1628,32 @@ static void select(ge_precomp *t, int pos, signed char b) {
ge_precomp_cmov(t, &minust, bnegative);
}
// With these select_vartime/ge_scalarmult_base_vartime I got ~25% speed up comparing to the select/ge_scalarmult_base -- sowle
static void select_vartime(ge_precomp *t, int pos, signed char b) {
unsigned char bnegative = negative(b);
unsigned char babs = b - (((-bnegative) & b) << 1);
const ge_precomp* base;
if (babs == 0)
{
ge_precomp_0(t);
}
else if (bnegative == 0)
{
base = &ge_base[pos][babs - 1];
fe_copy(t->yplusx, base->yplusx);
fe_copy(t->yminusx, base->yminusx);
fe_copy(t->xy2d, base->xy2d);
}
else
{
base = &ge_base[pos][babs - 1];
fe_copy(t->yplusx, base->yminusx);
fe_copy(t->yminusx, base->yplusx);
fe_neg(t->xy2d, base->xy2d);
}
}
/*
h = a * B
where a = a[0]+256*a[1]+...+256^31 a[31]
@@ -1679,6 +1705,48 @@ void ge_scalarmult_base(ge_p3 *h, const unsigned char *a) {
}
}
void ge_scalarmult_base_vartime(ge_p3 *h, const unsigned char *a) {
signed char e[64];
signed char carry;
ge_p1p1 r;
ge_p2 s;
ge_precomp t;
int i;
for (i = 0; i < 32; ++i) {
e[2 * i + 0] = (a[i] >> 0) & 15;
e[2 * i + 1] = (a[i] >> 4) & 15;
}
/* each e[i] is between 0 and 15 */
/* e[63] is between 0 and 7 */
carry = 0;
for (i = 0; i < 63; ++i) {
e[i] += carry;
carry = e[i] + 8;
carry >>= 4;
e[i] -= carry << 4;
}
e[63] += carry;
/* each e[i] is between -8 and 8 */
ge_p3_0(h);
for (i = 1; i < 64; i += 2) {
select_vartime(&t, i / 2, e[i]);
ge_madd(&r, h, &t); ge_p1p1_to_p3(h, &r);
}
ge_p3_dbl(&r, h); ge_p1p1_to_p2(&s, &r);
ge_p2_dbl(&r, &s); ge_p1p1_to_p2(&s, &r);
ge_p2_dbl(&r, &s); ge_p1p1_to_p2(&s, &r);
ge_p2_dbl(&r, &s); ge_p1p1_to_p3(h, &r);
for (i = 0; i < 64; i += 2) {
select_vartime(&t, i / 2, e[i]);
ge_madd(&r, h, &t); ge_p1p1_to_p3(h, &r);
}
}
/* From ge_sub.c */
/*
@@ -2096,6 +2164,43 @@ void ge_scalarmult(ge_p2 *r, const unsigned char *a, const ge_p3 *A) {
}
}
void ge_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A) {
signed char aslide[256];
ge_dsmp Ai; /* A, 3A, 5A, 7A, 9A, 11A, 13A, 15A */
ge_p1p1 t;
ge_p3 u;
ge_p2 r_p2;
int i;
slide(aslide, a);
ge_dsm_precomp(Ai, A);
ge_p2_0(&r_p2);
ge_p2_0(r);
for (i = 255; i >= 0; --i) {
if (aslide[i]) break;
}
for (; i >= 0; --i) {
ge_p2_dbl(&t, &r_p2);
if (aslide[i] > 0) {
ge_p1p1_to_p3(&u, &t);
ge_add(&t, &u, &Ai[aslide[i] / 2]);
}
else if (aslide[i] < 0) {
ge_p1p1_to_p3(&u, &t);
ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
}
if (i != 0)
ge_p1p1_to_p2(&r_p2, &t);
else
ge_p1p1_to_p2(r, &t); // last step
}
}
void ge_scalarmult_p3(ge_p3 *r3, const unsigned char *a, const ge_p3 *A) {
signed char e[64];
int carry, carry2, i;

View File

@@ -118,6 +118,8 @@ void ge_p3_tobytes(unsigned char *, const ge_p3 *);
extern const ge_precomp ge_base[32][8];
void ge_scalarmult_base(ge_p3 *, const unsigned char *);
void ge_scalarmult_base_vartime(ge_p3 *h, const unsigned char *a);
/* From ge_tobytes.c */
void ge_tobytes(unsigned char *, const ge_p2 *);
@@ -129,6 +131,7 @@ void sc_reduce(unsigned char *);
/* New code */
void ge_scalarmult(ge_p2 *, const unsigned char *, const ge_p3 *);
void ge_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A);
void ge_scalarmult_p3(ge_p3 *, const unsigned char *, const ge_p3 *);
void ge_double_scalarmult_precomp_vartime(ge_p2 *, const unsigned char *, const ge_p3 *, const unsigned char *, const ge_dsmp);
void ge_triple_scalarmult_precomp_vartime(ge_p2 *, const unsigned char *, const ge_dsmp, const unsigned char *, const ge_dsmp, const unsigned char *, const ge_dsmp);

View File

@@ -89,7 +89,7 @@ void generate_keys(hash& pub, hash& sec)
} while (!sc_isnonzero(sec.h));
ge_p3 point;
ge_scalarmult_base(&point, sec.h);
ge_scalarmult_base_vartime(&point, sec.h);
ge_p3_tobytes(pub.h, &point);
}
@@ -113,7 +113,7 @@ void generate_keys_deterministic(hash& pub, hash& sec, const uint8_t* entropy, s
} while (!sc_isnonzero(sec.h));
ge_p3 point;
ge_scalarmult_base(&point, sec.h);
ge_scalarmult_base_vartime(&point, sec.h);
ge_p3_tobytes(pub.h, &point);
}
@@ -125,7 +125,7 @@ bool check_keys(const hash& pub, const hash& sec)
}
ge_p3 point;
ge_scalarmult_base(&point, sec.h);
ge_scalarmult_base_vartime(&point, sec.h);
hash pub_check;
ge_p3_tobytes(pub_check.h, &point);
@@ -206,7 +206,7 @@ public:
return false;
}
ge_scalarmult(&point2, key2.h, &point);
ge_scalarmult_vartime(&point2, key2.h, &point);
ge_mul8(&point3, &point2);
ge_p1p1_to_p2(&point2, &point3);
ge_tobytes(reinterpret_cast<uint8_t*>(&derivation), &point2);
@@ -253,7 +253,7 @@ public:
}
derivation_to_scalar(derivation, output_index, scalar);
ge_scalarmult_base(&point2, reinterpret_cast<uint8_t*>(&scalar));
ge_scalarmult_base_vartime(&point2, reinterpret_cast<uint8_t*>(&scalar));
ge_p3_to_cached(&point3, &point2);
ge_add(&point4, &point1, &point3);
ge_p1p1_to_p2(&point5, &point4);