X-Git-Url: http://www.git.cypherpunks.ru/?p=pyderasn.git;a=blobdiff_plain;f=pyderasn.py;h=6d1e7eb736235ec29d37853a0b418525a2e16599;hp=4e3028ca0cad2314cbd088e1097723eb0ef362fb;hb=3485617cd8e158154eae89c0ef2258e7528e2b3b;hpb=eb9768b4e6862f29f870cb928e9a922dfec3a684 diff --git a/pyderasn.py b/pyderasn.py index 4e3028c..6d1e7eb 100755 --- a/pyderasn.py +++ b/pyderasn.py @@ -135,6 +135,8 @@ example ``TBSCertificate`` sequence holds defaulted, explicitly tagged When default argument is used and value is not specified, then it equals to default one. +.. _bounds: + Size constraints ________________ @@ -164,8 +166,10 @@ then :py:exc:`pyderasn.ObjNotReady` exception will be raised. All objects have ``copy()`` method, returning its copy, that can be safely mutated. +.. _decoding: + Decoding -________ +-------- Decoding is performed using ``decode()`` method. ``offset`` optional argument could be used to set initial object's offset in the binary @@ -191,8 +195,10 @@ lesser than ``offset``), ``expl_tlen``, ``expl_llen``, ``expl_vlen`` When error occurs, then :py:exc:`pyderasn.DecodeError` is raised. +.. _pprinting: + Pretty printing -_______________ +--------------- All objects have ``pps()`` method, that is a generator of :py:class:`pyderasn.PP` namedtuple, holding various raw information @@ -209,6 +215,118 @@ all object ``repr``. But it is easy to write custom formatters. >>> print(pprint(obj)) 0 [1,1, 2] INTEGER -12345 +.. _definedby: + +DEFINED BY +---------- + +ASN.1 structures often have ANY and OCTET STRING fields, that are +DEFINED BY some previously met ObjectIdentifier. This library provides +ability to specify mapping between some OID and field that must be +decoded with specific specification. + +defines kwarg +_____________ + +:py:class:`pyderasn.ObjectIdentifier` field inside +:py:class:`pyderasn.Sequence` can hold mapping between OIDs and +necessary for decoding structrures. For example, CMS (:rfc:`5652`) +container:: + + class ContentInfo(Sequence): + schema = ( + ("contentType", ContentType(defines=("content", { + id_digestedData: DigestedData(), + id_signedData: SignedData(), + }))), + ("content", Any(expl=tag_ctxc(0))), + ) + +``contentType`` field tells that it defines that ``content`` must be +decoded with ``SignedData`` specification, if ``contentType`` equals to +``id-signedData``. The same applies to ``DigestedData``. If +``contentType`` contains unknown OID, then no automatic decoding is +done. + +Following types can be automatically decoded (DEFINED BY): + +* :py:class:`pyderasn.Any` +* :py:class:`pyderasn.OctetString` +* :py:class:`pyderasn.SequenceOf`/:py:class:`pyderasn.SetOf` + ``Any``/``OctetString``-s + +When any of those fields is automatically decoded, then ``.defined`` +attribute contains ``(OID, value)`` tuple. OID tell by which OID it was +defined, ``value`` contains corresponding decoded value. For example +above, ``content_info["content"].defined == (id_signedData, +signed_data)``. + +.. _defines_by_path_kwarg: + +defines_by_path kwarg +_____________________ + +Sometimes you either can not or do not want to explicitly set *defines* +in the scheme. You can dynamically apply those definitions when calling +``.decode()`` method. + +Decode method takes optional ``defines_by_path`` keyword argument that +must be sequence of following tuples:: + + (decode_path, defines) + +where ``decode_path`` is a tuple holding so-called decode path to the +exact :py:class:`pyderasn.ObjectIdentifier` field you want to apply +``defines``, holding exactly the same value as accepted in its keyword +argument. + +For example, again for CMS, you want to automatically decode +``SignedData`` and CMC's (:rfc:`5272`) ``PKIData`` and ``PKIResponse`` +structures it may hold. Also, automatically decode ``controlSequence`` +of ``PKIResponse``:: + + content_info, tail = ContentInfo().decode(data, defines_by_path=( + ( + ("contentType",), + ("content", {id_signedData: SignedData()}), + ), + ( + ( + "content", + decode_path_defby(id_signedData), + "encapContentInfo", + "eContentType", + ), + ("eContent", { + id_cct_PKIData: PKIData(), + id_cct_PKIResponse: PKIResponse(), + }), + ), + ( + ( + "content", + decode_path_defby(id_signedData), + "encapContentInfo", + "eContent", + decode_path_defby(id_cct_PKIResponse), + "controlSequence", + any, + "attrType", + ), + ("attrValues", { + id_cmc_recipientNonce: RecipientNonce(), + id_cmc_senderNonce: SenderNonce(), + id_cmc_statusInfoV2: CMCStatusInfoV2(), + id_cmc_transactionId: TransactionId(), + }), + ), + )) + +Pay attention for :py:func:`pyderasn.decode_path_defby` and ``any``. +First function is useful for path construction when some automatic +decoding is already done. ``any`` is used for human readability and +means literally any value it meet -- useful for sequence and set of-s. + Primitive types --------------- @@ -338,6 +456,7 @@ __all__ = ( "Boolean", "BoundsError", "Choice", + "decode_path_defby", "DecodeError", "Enumerated", "GeneralizedTime", @@ -747,7 +866,7 @@ class Obj(object): def _encode(self): # pragma: no cover raise NotImplementedError() - def _decode(self, tlv, offset=0, decode_path=()): # pragma: no cover + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): # pragma: no cover raise NotImplementedError() def encode(self): @@ -756,13 +875,14 @@ class Obj(object): return raw return b"".join((self._expl, len_encode(len(raw)), raw)) - def decode(self, data, offset=0, leavemm=False, decode_path=()): + def decode(self, data, offset=0, leavemm=False, decode_path=(), defines_by_path=None): """Decode the data :param data: either binary or memoryview :param int offset: initial data's offset :param bool leavemm: do we need to leave memoryview of remaining data as is, or convert it to bytes otherwise + :param defines_by_path: :ref:`Read about DEFINED BY ` :returns: (Obj, remaining data) """ tlv = memoryview(data) @@ -771,6 +891,7 @@ class Obj(object): tlv, offset, decode_path=decode_path, + defines_by_path=defines_by_path, ) else: try: @@ -807,7 +928,8 @@ class Obj(object): obj, tail = self._decode( v, offset=offset + tlen + llen, - decode_path=(), + decode_path=decode_path, + defines_by_path=defines_by_path, ) return obj, (tail if leavemm else tail.tobytes()) @@ -840,6 +962,12 @@ class Obj(object): return self.expl_tlen + self.expl_llen + self.expl_vlen +def decode_path_defby(defined_by): + """DEFINED BY representation inside decode path + """ + return "DEFINED BY (%s)" % defined_by + + ######################################################################## # Pretty printing ######################################################################## @@ -1109,7 +1237,7 @@ class Boolean(Obj): (b"\xFF" if self._value else b"\x00"), )) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -1402,7 +1530,7 @@ class Integer(Obj): break return b"".join((self.tag, len_encode(len(octets)), octets)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -1735,7 +1863,7 @@ class BitString(Obj): octets, )) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -1856,7 +1984,7 @@ class OctetString(Obj): >>> OctetString(b"hell", bounds=(4, 4)) OCTET STRING 4 bytes 68656c6c """ - __slots__ = ("_bound_min", "_bound_max") + __slots__ = ("_bound_min", "_bound_max", "defined") tag_default = tag_encode(4) asn1_type_name = "OCTET STRING" @@ -1904,6 +2032,7 @@ class OctetString(Obj): ) if self._value is None: self._value = default + self.defined = None def _value_sanitize(self, value): if issubclass(value.__class__, OctetString): @@ -1981,7 +2110,7 @@ class OctetString(Obj): self._value, )) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -2056,6 +2185,11 @@ class OctetString(Obj): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, ) + defined_by, defined = self.defined or (None, None) + if defined_by is not None: + yield defined.pps( + decode_path=decode_path + (decode_path_defby(defined_by),) + ) class Null(Obj): @@ -2125,7 +2259,7 @@ class Null(Obj): def _encode(self): return self.tag + len_encode(0) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -2205,13 +2339,14 @@ class ObjectIdentifier(Obj): Traceback (most recent call last): pyderasn.InvalidOID: unacceptable first arc value """ - __slots__ = () + __slots__ = ("defines",) tag_default = tag_encode(6) asn1_type_name = "OBJECT IDENTIFIER" def __init__( self, value=None, + defines=None, impl=None, expl=None, default=None, @@ -2222,6 +2357,13 @@ class ObjectIdentifier(Obj): :param value: set the value. Either tuples of integers, string of "."-concatenated integers, or :py:class:`pyderasn.ObjectIdentifier` object + :param defines: tuple of two elements. First one is a name of + field inside :py:class:`pyderasn.Sequence`, + defining with that OID. Second element is a + ``{OID: pyderasn.Obj()}`` dictionary, mapping + between current OID value and structure applied + to defined field. + :ref:`Read about DEFINED BY ` :param bytes impl: override default tag with ``IMPLICIT`` one :param bytes expl: override default tag with ``EXPLICIT`` one :param default: set default value. Type same as in ``value`` @@ -2246,6 +2388,7 @@ class ObjectIdentifier(Obj): ) if self._value is None: self._value = default + self.defines = defines def __add__(self, their): if isinstance(their, self.__class__): @@ -2283,6 +2426,7 @@ class ObjectIdentifier(Obj): def copy(self): obj = self.__class__() obj._value = self._value + obj.defines = self.defines obj.tag = self.tag obj._expl = self._expl obj.default = self.default @@ -2324,6 +2468,7 @@ class ObjectIdentifier(Obj): def __call__( self, value=None, + defines=None, impl=None, expl=None, default=None, @@ -2331,6 +2476,7 @@ class ObjectIdentifier(Obj): ): return self.__class__( value=value, + defines=self.defines if defines is None else defines, impl=self.tag if impl is None else impl, expl=self._expl if expl is None else expl, default=self.default if default is None else default, @@ -2356,7 +2502,7 @@ class ObjectIdentifier(Obj): v = b"".join(octets) return b"".join((self.tag, len_encode(len(v)), v)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -3123,7 +3269,7 @@ class Choice(Obj): self._assert_ready() return self._value[1].encode() - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): for choice, spec in self.specs.items(): try: value, tail = spec.decode( @@ -3131,6 +3277,7 @@ class Choice(Obj): offset=offset, leavemm=True, decode_path=decode_path + (choice,), + defines_by_path=defines_by_path, ) except TagMismatch: continue @@ -3219,7 +3366,7 @@ class Any(Obj): >>> hexenc(bytes(a)) b'0x040x0bhello world' """ - __slots__ = () + __slots__ = ("defined",) tag_default = tag_encode(0) asn1_type_name = "ANY" @@ -3240,6 +3387,7 @@ class Any(Obj): """ super(Any, self).__init__(None, expl, None, optional, _decoded) self._value = None if value is None else self._value_sanitize(value) + self.defined = None def _value_sanitize(self, value): if isinstance(value, self.__class__): @@ -3296,7 +3444,7 @@ class Any(Obj): self._assert_ready() return self._value - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, tlen, lv = tag_strip(tlv) l, llen, v = len_decode(lv) @@ -3347,12 +3495,30 @@ class Any(Obj): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, ) + defined_by, defined = self.defined or (None, None) + if defined_by is not None: + yield defined.pps( + decode_path=decode_path + (decode_path_defby(defined_by),) + ) ######################################################################## # ASN.1 constructed types ######################################################################## +def get_def_by_path(defines_by_path, sub_decode_path): + """Get define by decode path + """ + for path, define in defines_by_path: + if len(path) != len(sub_decode_path): + continue + for p1, p2 in zip(path, sub_decode_path): + if (p1 != any) and (p1 != p2): + break + else: + return define + + class Sequence(Obj): """``SEQUENCE`` structure type @@ -3566,7 +3732,7 @@ class Sequence(Obj): v = b"".join(self._encoded_values()) return b"".join((self.tag, len_encode(len(v)), v)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, tlen, lv = tag_strip(tlv) except DecodeError as err: @@ -3601,20 +3767,65 @@ class Sequence(Obj): v, tail = v[:l], v[l:] sub_offset = offset + tlen + llen values = {} + defines = {} for name, spec in self.specs.items(): if len(v) == 0 and spec.optional: continue + sub_decode_path = decode_path + (name,) try: value, v_tail = spec.decode( v, sub_offset, leavemm=True, - decode_path=decode_path + (name,), + decode_path=sub_decode_path, + defines_by_path=defines_by_path, ) except TagMismatch: if spec.optional: continue raise + + defined = defines.pop(name, None) + if defined is not None: + defined_by, defined_spec = defined + if issubclass(value.__class__, SequenceOf): + for i, _value in enumerate(value): + sub_sub_decode_path = sub_decode_path + ( + str(i), + decode_path_defby(defined_by), + ) + defined_value, defined_tail = defined_spec.decode( + memoryview(bytes(_value)), + sub_offset + value.tlen + value.llen, + leavemm=True, + decode_path=sub_sub_decode_path, + defines_by_path=defines_by_path, + ) + if len(defined_tail) > 0: + raise DecodeError( + "remaining data", + klass=self.__class__, + decode_path=sub_sub_decode_path, + offset=offset, + ) + _value.defined = (defined_by, defined_value) + else: + defined_value, defined_tail = defined_spec.decode( + memoryview(bytes(value)), + sub_offset + value.tlen + value.llen, + leavemm=True, + decode_path=sub_decode_path + (decode_path_defby(defined_by),), + defines_by_path=defines_by_path, + ) + if len(defined_tail) > 0: + raise DecodeError( + "remaining data", + klass=self.__class__, + decode_path=sub_decode_path + (decode_path_defby(defined_by),), + offset=offset, + ) + value.defined = (defined_by, defined_value) + sub_offset += (value.expl_tlvlen if value.expled else value.tlvlen) v = v_tail if spec.default is not None and value == spec.default: @@ -3622,6 +3833,15 @@ class Sequence(Obj): # but we allow that anyway continue values[name] = value + + spec_defines = getattr(spec, "defines", None) + if defines_by_path is not None and spec_defines is None: + spec_defines = get_def_by_path(defines_by_path, sub_decode_path) + if spec_defines is not None: + what, schema = spec_defines + defined = schema.get(value, None) + if defined is not None: + defines[what] = (value, defined) if len(v) > 0: raise DecodeError( "remaining data", @@ -3690,7 +3910,7 @@ class Set(Sequence): v = b"".join(raws) return b"".join((self.tag, len_encode(len(v)), v)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, tlen, lv = tag_strip(tlv) except DecodeError as err: @@ -3733,6 +3953,7 @@ class Set(Sequence): sub_offset, leavemm=True, decode_path=decode_path + (name,), + defines_by_path=defines_by_path, ) except TagMismatch: continue @@ -3942,7 +4163,7 @@ class SequenceOf(Obj): v = b"".join(self._encoded_values()) return b"".join((self.tag, len_encode(len(v)), v)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset=0, decode_path=(), defines_by_path=None): try: t, tlen, lv = tag_strip(tlv) except DecodeError as err: @@ -3984,6 +4205,7 @@ class SequenceOf(Obj): sub_offset, leavemm=True, decode_path=decode_path + (str(len(_value)),), + defines_by_path=defines_by_path, ) sub_offset += (value.expl_tlvlen if value.expled else value.tlvlen) v = v_tail @@ -4062,6 +4284,47 @@ def obj_by_path(pypath): # pragma: no cover return obj +def generic_decoder(): # pragma: no cover + # All of this below is a big hack with self references + choice = PrimitiveTypes() + choice.specs["SequenceOf"] = SequenceOf(schema=choice) + choice.specs["SetOf"] = SetOf(schema=choice) + for i in range(31): + choice.specs["SequenceOf%d" % i] = SequenceOf( + schema=choice, + expl=tag_ctxc(i), + ) + choice.specs["Any"] = Any() + + # Class name equals to type name, to omit it from output + class SEQUENCEOF(SequenceOf): + __slots__ = () + schema = choice + + def pprint_any(obj, oids=None): + def _pprint_pps(pps): + for pp in pps: + if hasattr(pp, "_fields"): + if pp.asn1_type_name == Choice.asn1_type_name: + continue + pp_kwargs = pp._asdict() + pp_kwargs["decode_path"] = pp.decode_path[:-1] + (">",) + pp = _pp(**pp_kwargs) + yield pp_console_row( + pp, + oids=oids, + with_offsets=True, + with_blob=False, + ) + for row in pp_console_blob(pp): + yield row + else: + for row in _pprint_pps(pp): + yield row + return "\n".join(_pprint_pps(obj.pps())) + return SEQUENCEOF(), pprint_any + + def main(): # pragma: no cover import argparse parser = argparse.ArgumentParser(description="PyDERASN ASN.1 DER decoder") @@ -4073,6 +4336,10 @@ def main(): # pragma: no cover "--schema", help="Python path to schema definition to use", ) + parser.add_argument( + "--defines-by-path", + help="Python path to decoder's defines_by_path", + ) parser.add_argument( "DERFile", type=argparse.FileType("rb"), @@ -4087,46 +4354,14 @@ def main(): # pragma: no cover from functools import partial pprinter = partial(pprint, big_blobs=True) else: - # All of this below is a big hack with self references - choice = PrimitiveTypes() - choice.specs["SequenceOf"] = SequenceOf(schema=choice) - choice.specs["SetOf"] = SetOf(schema=choice) - for i in range(31): - choice.specs["SequenceOf%d" % i] = SequenceOf( - schema=choice, - expl=tag_ctxc(i), - ) - choice.specs["Any"] = Any() - - # Class name equals to type name, to omit it from output - class SEQUENCEOF(SequenceOf): - __slots__ = () - schema = choice - schema = SEQUENCEOF() - - def pprint_any(obj, oids=None): - def _pprint_pps(pps): - for pp in pps: - if hasattr(pp, "_fields"): - if pp.asn1_type_name == Choice.asn1_type_name: - continue - pp_kwargs = pp._asdict() - pp_kwargs["decode_path"] = pp.decode_path[:-1] + (">",) - pp = _pp(**pp_kwargs) - yield pp_console_row( - pp, - oids=oids, - with_offsets=True, - with_blob=False, - ) - for row in pp_console_blob(pp): - yield row - else: - for row in _pprint_pps(pp): - yield row - return "\n".join(_pprint_pps(obj.pps())) - pprinter = pprint_any - obj, tail = schema().decode(der) + schema, pprinter = generic_decoder() + obj, tail = schema().decode( + der, + defines_by_path=( + None if args.defines_by_path is None + else obj_by_path(args.defines_by_path) + ), + ) print(pprinter(obj, oids=oids)) if tail != b"": print("\nTrailing data: %s" % hexenc(tail))