viff

view viff/prss.py @ 898:ff249565fa3a

Added Sigurd to the VIFF team!
author Martin Geisler <mg@daimi.au.dk>
date Mon Aug 25 01:19:50 2008 +0200 (3 years ago)
parents 28506572a2ca
children 54c25924882a
line source
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright 2007, 2008 VIFF Development Team.
4 #
5 # This file is part of VIFF, the Virtual Ideal Functionality Framework.
6 #
7 # VIFF is free software: you can redistribute it and/or modify it
8 # under the terms of the GNU Lesser General Public License (LGPL) as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
11 #
12 # VIFF is distributed in the hope that it will be useful, but WITHOUT
13 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
15 # Public License for more details.
16 #
17 # You should have received a copy of the GNU Lesser General Public
18 # License along with VIFF. If not, see <http://www.gnu.org/licenses/>.
20 u"""Methods for pseudo-random secret sharing. Normal Shamir sharing
21 (see the :mod:`viff.shamir` module) requires secure channels between
22 the players for distributing shares. With pseudo-random secret sharing
23 one can share a secret using a single broadcast instead.
25 PRSS relies on each player having access to a set of previously
26 distributed pseudo-random functions (PRFs) --- or rather the seeds for
27 such functions. In VIFF, such seeds are generated by
28 :func:`viff.config.generate_configs`. The
29 :meth:`viff.config.Player.prfs` and
30 :meth:`viff.config.Player.dealer_prfs` methods give access to the
31 PRFs.
33 In this module the function :func:`prss` is used to calculate shares
34 for a pseudo-random number. The :func:`generate_subsets` function is a
35 general utility for generating subsets of a specific size.
37 The code is based on the paper "Share Conversion, Pseudorandom
38 Secret-Sharing and Applications to Secure Computation" by Ronald
39 Cramer, Ivan Damgård, and Yuval Ishai in Proc. of TCC 2005, LNCS 3378.
40 `Download <http://www.cs.technion.ac.il/~yuvali/pubs/CDI05.ps>`__.
41 """
43 __docformat__ = "restructuredtext"
46 import sha
47 from math import ceil
48 from struct import pack
49 from binascii import hexlify
51 from gmpy import numdigits
53 from viff import shamir
54 from viff.field import GF256
56 def random_replicated_sharing(j, prfs, key):
57 """Return a replicated sharing of a random number.
59 The shares are for player *j* based on the pseudo-random functions
60 given in *prfs* (a mapping from subsets of players to :class:`PRF`
61 instances). The *key* is used when evaluating the PRFs. The result
62 is a list of ``(subset, share)`` pairs.
63 """
64 # The PRFs contain the subsets we need, plus some extra in the
65 # case of dealer_keys. That is why we have to check that j is in
66 # the subset before using it.
67 return [(s, prf(key)) for (s, prf) in prfs.iteritems() if j in s]
69 def convert_replicated_shamir(n, j, field, rep_shares):
70 """Convert a set of replicated shares to a Shamir share.
72 The conversion is done for player *j* (out of *n*) and will be
73 done over *field*.
74 """
75 result = 0
76 all = frozenset(range(1, n+1))
77 for subset, share in rep_shares:
78 # TODO: should we cache the f_in_j values?
79 points = [(field(x), 0) for x in all-subset]
80 points.append((0, 1))
81 f_in_j = shamir.recombine(points, j)
82 result += share * f_in_j
83 return result
85 def prss(n, j, field, prfs, key):
86 """Return a pseudo-random secret share for a random number.
88 The share is for player *j* based on the pseudo-random functions
89 given in *prfs* (a mapping from subsets of players to :class:`PRF`
90 instances). The *key* is used when evaluating the PRFs.
92 An example with (n,t) = (3,1) and a modulus of 31:
94 >>> from field import GF
95 >>> Zp = GF(31)
96 >>> prfs = {frozenset([1,2]): PRF("a", 31),
97 ... frozenset([1,3]): PRF("b", 31),
98 ... frozenset([2,3]): PRF("c", 31)}
99 >>> prss(3, 1, Zp, prfs, "key")
100 {22}
101 >>> prss(3, 2, Zp, prfs, "key")
102 {20}
103 >>> prss(3, 3, Zp, prfs, "key")
104 {18}
106 We see that the sharing is consistent because each subset of two
107 players will recombine their shares to ``{24}``.
108 """
109 rep_shares = random_replicated_sharing(j, prfs, key)
110 return convert_replicated_shamir(n, j, field, rep_shares)
112 def prss_lsb(n, j, field, prfs, key):
113 """Share a pseudo-random number and its least significant bit.
115 The random number is shared over *field* and its least significant
116 bit is shared over :class:`viff.field.GF256`. It is important the
117 *prfs* generate numbers much less than the size of *field* -- we
118 must be able to do an addition for each PRF without overflow in
119 *field*.
121 >>> from field import GF
122 >>> Zp = GF(23)
123 >>> prfs = {frozenset([1,2]): PRF("a", 7),
124 ... frozenset([1,3]): PRF("b", 7),
125 ... frozenset([2,3]): PRF("c", 7)}
126 >>> prss_lsb(3, 1, Zp, prfs, "key")
127 ({0}, [140])
128 >>> prss_lsb(3, 2, Zp, prfs, "key")
129 ({15}, [3])
130 >>> prss_lsb(3, 3, Zp, prfs, "key")
131 ({7}, [143])
133 We see that the random value must be ``{8}`` and so the least
134 significant bit must be ``[0]``. We can check this by recombining
135 any two of the three shares:
137 >>> from shamir import recombine
138 >>> recombine([(GF256(1), GF256(140)), (GF256(2), GF256(3))])
139 [0]
140 >>> recombine([(GF256(2), GF256(3)), (GF256(3), GF256(143))])
141 [0]
142 >>> recombine([(GF256(3), GF256(143)), (GF256(1), GF256(140))])
143 [0]
144 """
145 rep_shares = random_replicated_sharing(j, prfs, key)
146 lsb_shares = [(s, r & 1) for (s, r) in rep_shares]
147 return (convert_replicated_shamir(n, j, field, rep_shares),
148 convert_replicated_shamir(n, j, GF256, lsb_shares))
150 def prss_zero(n, t, j, field, prfs, key):
151 """Return a pseudo-random secret zero-sharing of degree 2t.
153 >>> from field import GF
154 >>> Zp = GF(23)
155 >>> prfs = {frozenset([1,2]): PRF("a", 7),
156 ... frozenset([1,3]): PRF("b", 7),
157 ... frozenset([2,3]): PRF("c", 7)}
158 >>> prss_zero(3, 1, 1, Zp, prfs, "key")
159 {4}
160 >>> prss_zero(3, 1, 2, Zp, prfs, "key")
161 {0}
162 >>> prss_zero(3, 1, 3, Zp, prfs, "key")
163 {11}
165 If we recombine 2t + 1 = 3 shares we can verify that this is
166 indeed a zero-sharing:
168 >>> from shamir import recombine
169 >>> recombine([(Zp(1), Zp(4)), (Zp(2), Zp(0)), (Zp(3), Zp(11))])
170 {0}
171 """
172 # We start by generating t random numbers for each subset. This is
173 # very similar to calling random_replicated_sharing t times, but
174 # by doing it like this we immediatedly get the nesting we want.
175 rep_shares = [(s, [(i+1, prf((key, i))) for i in range(t)])
176 for (s, prf) in prfs.iteritems() if j in s]
178 # We then proceed with the zero-sharing. The first part is like in
179 # a normal PRSS.
180 result = 0
181 all = frozenset(range(1, n+1))
182 for subset, shares in rep_shares:
183 points = [(field(x), 0) for x in all-subset]
184 points.append((0, 1))
185 f_in_j = shamir.recombine(points, j)
186 # Unlike a normal PRSS we have an inner sum where we use a
187 # degree 2t polynomial g_i which we choose as
189 # g_i(x) = f(x) * x**j
191 # since we already have the degree t polynomial f at hand. The
192 # g_i are all linearly independent as required by the protocol
193 # and can thus be used for the zero-sharing.
194 for i, share in shares:
195 g_i_in_j = f_in_j * j**i
196 result += share * g_i_in_j
197 return result
199 def generate_subsets(orig_set, size):
200 """Generates the set of all subsets of a specific size.
202 >>> generate_subsets(frozenset('abc'), 2)
203 frozenset([frozenset(['c', 'b']), frozenset(['a', 'c']), frozenset(['a', 'b'])])
205 Generating subsets larger than the initial set return the empty
206 set:
208 >>> generate_subsets(frozenset('a'), 2)
209 frozenset([])
210 """
211 if len(orig_set) > size:
212 result = set()
213 for element in orig_set:
214 result.update(generate_subsets(orig_set - set([element]), size))
215 return frozenset(result)
216 elif len(orig_set) == size:
217 return frozenset([orig_set])
218 else:
219 return frozenset()
221 # Generating 100,000 bytes like this:
223 # x = PRF("a", 256)
224 # for i in xrange(100000):
225 # sys.stdout.write(chr(x(i)))
227 # and piping them into ent (http://www.fourmilab.ch/random/) gives:
229 # Entropy = 7.998124 bits per byte.
231 # Optimum compression would reduce the size
232 # of this 100000 byte file by 0 percent.
234 # Chi square distribution for 100000 samples is 260.10, and randomly
235 # would exceed this value 50.00 percent of the times.
237 # Arithmetic mean value of data bytes is 127.6850 (127.5 = random).
238 # Monte Carlo value for Pi is 3.156846274 (error 0.49 percent).
239 # Serial correlation coefficient is 0.000919 (totally uncorrelated = 0.0).
240 class PRF(object):
241 """Models a pseudo random function (a PRF).
243 The numbers are based on a SHA1 hash of the initial key.
245 Each PRF is created based on a key (which should be random and
246 secret) and a maximum (which may be public):
248 >>> f = PRF("some random key", 256)
250 Calling f return values between zero and the given maximum:
252 >>> f(1)
253 246L
254 >>> f(2)
255 254L
256 >>> f(3)
257 13L
258 """
260 def __init__(self, key, max):
261 """Create a PRF keyed with the given key and max.
263 The key must be a string whereas the max must be a number.
264 Output value will be in the range zero to max, with zero
265 included and max excluded.
267 To make a PRF what generates numbers less than 1000 do:
269 >>> f = PRF("key", 1000)
271 The PRF can be evaluated by calling it on some input:
273 >>> f("input")
274 327L
276 Creating another PRF with the same key gives identical results
277 since f and g are deterministic functions, depending only on
278 the key:
280 >>> g = PRF("key", 1000)
281 >>> g("input")
282 327L
284 We can test that f and g behave the same on many inputs:
286 >>> [f(i) for i in range(100)] == [g(i) for i in range(100)]
287 True
289 Both the key and the max is used when the PRF is keyed. This
290 means that
292 >>> f = PRF("key", 1000)
293 >>> g = PRF("key", 10000)
294 >>> [f(i) for i in range(100)] == [g(i) for i in range(100)]
295 False
296 """
297 self.max = max
299 # Number of bits needed for a number in the range [0, max-1].
300 bit_length = numdigits(max-1, 2)
302 # Number of whole digest blocks needed.
303 blocks = int(ceil(bit_length / 8.0 / sha.digest_size))
305 # Number of whole bytes needed.
306 self.bytes = int(ceil(bit_length / 8.0))
307 # Number of bits needed from the final byte.
308 self.bits = bit_length % 8
310 self.sha1s = []
311 for i in range(blocks):
312 # TODO: this construction is completely ad-hoc and not
313 # known to be secure...
315 # Initial seed is key + str(max). The maximum is included
316 # since we want PRF("input", 100) and PRF("input", 1000)
317 # to generate different output.
318 seed = key + str(max)
320 # The i'th generator is seeded with H^i(key + str(max))
321 # where H^i means repeated hashing i times.
322 for _ in range(i):
323 seed = sha.new(seed).digest()
324 self.sha1s.append(sha.new(seed))
326 def __call__(self, input):
327 """Return a number based on input.
329 If the input is not already a string, it is hashed (using the
330 normal Python hash built-in) and the hash value is used
331 instead. The hash value is a 32 bit value, so a string should
332 be given if one wants to evaluate the PRF on more that 2**32
333 different values.
335 >>> prf = PRF("key", 1000)
336 >>> prf(1), prf(2), prf(3)
337 (714L, 80L, 617L)
339 Since prf is a function we can of course evaluate the same
340 input to get the same output:
342 >>> prf(1)
343 714L
345 The prf can take arbitrary input:
347 >>> prf(("input", 123))
348 474L
350 but it must be hashable:
352 >>> prf(["input", 123]) # doctest: +IGNORE_EXCEPTION_DETAIL
353 Traceback (most recent call last):
354 ...
355 TypeError: list objects are unhashable
356 """
357 # We can only feed str data to sha1 instance, so we must
358 # convert the input.
359 if not isinstance(input, str):
360 input = pack("l", hash(input))
362 # There is a chance that we generate a number that is too big,
363 # so we must keep trying until we succeed.
364 while True:
365 # We collect a digest for each keyed sha1 instance.
366 digests = []
367 for sha1 in self.sha1s:
368 # Must work on a copy of the keyed sha1 instance.
369 copy = sha1.copy()
370 copy.update(input)
371 digests.append(copy.digest())
373 digest = ''.join(digests)
374 random_bytes = digest[:self.bytes]
376 # Convert the random bytes to a long by converting it to
377 # hexadecimal representation first.
378 result = long(hexlify(random_bytes), 16)
380 # Shift to get rid of the surplus bits (if needed).
381 if self.bits:
382 result >>= (8 - self.bits)
384 if result < self.max:
385 return result
386 else:
387 # TODO: is this safe? The first idea was to append a
388 # fixed string (".") every time, but that makes f("a")
389 # and f("a.") return the same number.
391 # The final byte of the digest depends on the key
392 # which means that it should not be possible to
393 # predict it and so it should be hard to find pairs of
394 # inputs which give the same output value.
395 input += digest[-1]
397 if __name__ == "__main__":
398 import doctest #pragma NO COVER
399 doctest.testmod() #pragma NO COVER