]> Cypherpunks.ru repositories - pyderasn.git/blobdiff - pyderasn.py
No magic 0x80 constant for length indefinite
[pyderasn.git] / pyderasn.py
index ffc788ca6cca7a8529cfa1a3ca93abc57d414675..b668507f3a46f9cccdf8d1cfbe3f97ae1cde9826 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-# PyDERASN -- Python ASN.1 DER codec with abstract structures
+# PyDERASN -- Python ASN.1 DER/BER codec with abstract structures
 # Copyright (C) 2017-2018 Sergey Matveev <stargrave@stargrave.org>
 #
 # This program is free software: you can redistribute it and/or modify
 # You should have received a copy of the GNU Lesser General Public
 # License along with this program.  If not, see
 # <http://www.gnu.org/licenses/>.
-"""Python ASN.1 DER codec with abstract structures
+"""Python ASN.1 DER/BER codec with abstract structures
 
-This library allows you to marshal and unmarshal various structures in
-ASN.1 DER format, like this:
+This library allows you to marshal various structures in ASN.1 DER
+format, unmarshal them in BER/CER/DER ones.
 
     >>> i = Integer(123)
     >>> raw = i.encode()
@@ -193,7 +193,7 @@ explicit tag. If you want to know information about it, then use:
 lesser than ``offset``), ``expl_tlen``, ``expl_llen``, ``expl_vlen``
 (that actually equals to ordinary ``tlvlen``).
 
-When error occurs, then :py:exc:`pyderasn.DecodeError` is raised.
+When error occurs, :py:exc:`pyderasn.DecodeError` is raised.
 
 .. _ctx:
 
@@ -206,6 +206,7 @@ decoding process.
 
 Currently available context options:
 
+* :ref:`bered <bered_ctx>`
 * :ref:`defines_by_path <defines_by_path_ctx>`
 * :ref:`strict_default_existence <strict_default_existence_ctx>`
 
@@ -363,6 +364,29 @@ First function is useful for path construction when some automatic
 decoding is already done. ``any`` means literally any value it meet --
 useful for SEQUENCE/SET OF-s.
 
+.. _bered_ctx:
+
+BER encoding
+------------
+
+By default PyDERASN accepts only DER encoded data. It always encodes to
+DER. But you can optionally enable BER decoding with setting ``bered``
+:ref:`context <ctx>` argument to True. Indefinite lengths and
+constructed primitive types should be parsed successfully.
+
+* If object is encoded in BER form (not the DER one), then ``bered``
+  attribute is set to True. Only ``BOOLEAN``, ``BIT STRING``, ``OCTET
+  STRING`` can contain it.
+* If object has an indefinite length encoding, then its ``lenindef``
+  attribute is set to True. Only ``BIT STRING``, ``OCTET STRING``,
+  ``SEQUENCE``, ``SET``, ``SEQUENCE OF``, ``SET OF``, ``ANY`` can
+  contain it.
+* If object has an indefinite length encoded explicit tag, then
+  ``expl_lenindef`` is set to True.
+
+EOC (end-of-contents) token's length is taken in advance in object's
+value length.
+
 Primitive types
 ---------------
 
@@ -404,6 +428,10 @@ CommonString
 ____________
 .. autoclass:: pyderasn.CommonString
 
+NumericString
+_____________
+.. autoclass:: pyderasn.NumericString
+
 UTCTime
 _______
 .. autoclass:: pyderasn.UTCTime
@@ -464,6 +492,17 @@ Various
 .. autofunction:: pyderasn.tag_ctxp
 .. autofunction:: pyderasn.tag_ctxc
 .. autoclass:: pyderasn.Obj
+.. autoclass:: pyderasn.DecodeError
+   :members: __init__
+.. autoclass:: pyderasn.NotEnoughData
+.. autoclass:: pyderasn.LenIndefForm
+.. autoclass:: pyderasn.TagMismatch
+.. autoclass:: pyderasn.InvalidLength
+.. autoclass:: pyderasn.InvalidOID
+.. autoclass:: pyderasn.ObjUnknown
+.. autoclass:: pyderasn.ObjNotReady
+.. autoclass:: pyderasn.InvalidValueType
+.. autoclass:: pyderasn.BoundsError
 """
 
 from codecs import getdecoder
@@ -516,6 +555,7 @@ __all__ = (
     "InvalidOID",
     "InvalidValueType",
     "ISO646String",
+    "LenIndefForm",
     "NotEnoughData",
     "Null",
     "NumericString",
@@ -563,6 +603,7 @@ TagClassReprs = {
 }
 EOC = b"\x00\x00"
 EOC_LEN = len(EOC)
+LENINDEF = b"\x80"  # length indefinite mark
 
 
 ########################################################################
@@ -870,9 +911,7 @@ class Obj(object):
         self.tag = getattr(self, "impl", self.tag_default) if impl is None else impl
         self._expl = getattr(self, "expl", None) if expl is None else expl
         if self.tag != self.tag_default and self._expl is not None:
-            raise ValueError(
-                "implicit and explicit tags can not be set simultaneously"
-            )
+            raise ValueError("implicit and explicit tags can not be set simultaneously")
         if default is not None:
             optional = True
         self.optional = optional
@@ -1014,7 +1053,8 @@ class Obj(object):
                 eoc_expected, tail = tail[:EOC_LEN], tail[EOC_LEN:]
                 if eoc_expected.tobytes() != EOC:
                     raise DecodeError(
-                        msg="no EOC",
+                        "no EOC",
+                        klass=self.__class__,
                         decode_path=decode_path,
                         offset=offset,
                     )
@@ -1883,6 +1923,14 @@ class BitString(Obj):
     ['nonRepudiation', 'keyEncipherment']
     >>> b.specs
     {'nonRepudiation': 1, 'digitalSignature': 0, 'keyEncipherment': 2}
+
+    .. note::
+
+       Pay attention that BIT STRING can be encoded both in primitive
+       and constructed forms. Decoder always checks constructed form tag
+       additionally to specified primitive one. If BER decoding is
+       :ref:`not enabled <bered_ctx>`, then decoder will fail, because
+       of DER restrictions.
     """
     __slots__ = ("tag_constructed", "specs", "defined")
     tag_default = tag_encode(3)
@@ -1958,9 +2006,7 @@ class BitString(Obj):
                         len(value) * 4,
                         hexdec(value + ("" if len(value) % 2 == 0 else "0")),
                     )
-                else:
-                    raise InvalidValueType((self.__class__, string_types, binary_type))
-            elif isinstance(value, binary_type):
+            if isinstance(value, binary_type):
                 return (len(value) * 8, value)
             else:
                 raise InvalidValueType((self.__class__, string_types, binary_type))
@@ -2152,7 +2198,8 @@ class BitString(Obj):
         if t == self.tag_constructed:
             if not ctx.get("bered", False):
                 raise DecodeError(
-                    msg="unallowed BER constructed encoding",
+                    "unallowed BER constructed encoding",
+                    klass=self.__class__,
                     decode_path=decode_path,
                     offset=offset,
                 )
@@ -2171,7 +2218,7 @@ class BitString(Obj):
                     decode_path=decode_path,
                     offset=offset,
                 )
-            if l > 0 and l > len(v):
+            if l > len(v):
                 raise NotEnoughData(
                     "encoded length is longer than data",
                     klass=self.__class__,
@@ -2197,8 +2244,9 @@ class BitString(Obj):
                         break
                     if vlen > l:
                         raise DecodeError(
-                            msg="chunk out of bounds",
-                            decode_path=len(chunks) - 1,
+                            "chunk out of bounds",
+                            klass=self.__class__,
+                            decode_path=decode_path + (str(len(chunks) - 1),),
                             offset=chunks[-1].offset,
                         )
                 sub_decode_path = decode_path + (str(len(chunks)),)
@@ -2212,7 +2260,8 @@ class BitString(Obj):
                     )
                 except TagMismatch:
                     raise DecodeError(
-                        msg="expected BitString encoded chunk",
+                        "expected BitString encoded chunk",
+                        klass=self.__class__,
                         decode_path=sub_decode_path,
                         offset=sub_offset,
                     )
@@ -2222,7 +2271,8 @@ class BitString(Obj):
                 v = v_tail
             if len(chunks) == 0:
                 raise DecodeError(
-                    msg="no chunks",
+                    "no chunks",
+                    klass=self.__class__,
                     decode_path=decode_path,
                     offset=offset,
                 )
@@ -2231,7 +2281,8 @@ class BitString(Obj):
             for chunk_i, chunk in enumerate(chunks[:-1]):
                 if chunk.bit_len % 8 != 0:
                     raise DecodeError(
-                        msg="BitString chunk is not multiple of 8 bit",
+                        "BitString chunk is not multiple of 8 bits",
+                        klass=self.__class__,
                         decode_path=decode_path + (str(chunk_i),),
                         offset=chunk.offset,
                     )
@@ -2313,6 +2364,14 @@ class OctetString(Obj):
     pyderasn.BoundsError: unsatisfied bounds: 4 <= 5 <= 4
     >>> OctetString(b"hell", bounds=(4, 4))
     OCTET STRING 4 bytes 68656c6c
+
+    .. note::
+
+       Pay attention that OCTET STRING can be encoded both in primitive
+       and constructed forms. Decoder always checks constructed form tag
+       additionally to specified primitive one. If BER decoding is
+       :ref:`not enabled <bered_ctx>`, then decoder will fail, because
+       of DER restrictions.
     """
     __slots__ = ("tag_constructed", "_bound_min", "_bound_max", "defined")
     tag_default = tag_encode(4)
@@ -2507,7 +2566,8 @@ class OctetString(Obj):
         if t == self.tag_constructed:
             if not ctx.get("bered", False):
                 raise DecodeError(
-                    msg="unallowed BER constructed encoding",
+                    "unallowed BER constructed encoding",
+                    klass=self.__class__,
                     decode_path=decode_path,
                     offset=offset,
                 )
@@ -2526,20 +2586,13 @@ class OctetString(Obj):
                     decode_path=decode_path,
                     offset=offset,
                 )
-            if l > 0 and l > len(v):
+            if l > len(v):
                 raise NotEnoughData(
                     "encoded length is longer than data",
                     klass=self.__class__,
                     decode_path=decode_path,
                     offset=offset,
                 )
-            if not lenindef and l == 0:
-                raise NotEnoughData(
-                    "zero length",
-                    klass=self.__class__,
-                    decode_path=decode_path,
-                    offset=offset,
-                )
             chunks = []
             sub_offset = offset + tlen + llen
             vlen = 0
@@ -2552,8 +2605,9 @@ class OctetString(Obj):
                         break
                     if vlen > l:
                         raise DecodeError(
-                            msg="chunk out of bounds",
-                            decode_path=len(chunks) - 1,
+                            "chunk out of bounds",
+                            klass=self.__class__,
+                            decode_path=decode_path + (str(len(chunks) - 1),),
                             offset=chunks[-1].offset,
                         )
                 sub_decode_path = decode_path + (str(len(chunks)),)
@@ -2567,7 +2621,8 @@ class OctetString(Obj):
                     )
                 except TagMismatch:
                     raise DecodeError(
-                        msg="expected OctetString encoded chunk",
+                        "expected OctetString encoded chunk",
+                        klass=self.__class__,
                         decode_path=sub_decode_path,
                         offset=sub_offset,
                     )
@@ -2575,12 +2630,6 @@ class OctetString(Obj):
                 sub_offset += chunk.tlvlen
                 vlen += chunk.tlvlen
                 v = v_tail
-            if len(chunks) == 0:
-                raise DecodeError(
-                    msg="no chunks",
-                    decode_path=decode_path,
-                    offset=offset,
-                )
             try:
                 obj = self.__class__(
                     value=b"".join(bytes(chunk) for chunk in chunks),
@@ -3299,6 +3348,10 @@ class UTF8String(CommonString):
 
 
 class NumericString(CommonString):
+    """Numeric string
+
+    Its value is properly sanitized: only ASCII digits can be stored.
+    """
     __slots__ = ()
     tag_default = tag_encode(18)
     encoding = "ascii"
@@ -3964,29 +4017,27 @@ class Any(Obj):
             llen, vlen, v = 1, 0, lv[1:]
             sub_offset = offset + tlen + llen
             chunk_i = 0
-            while True:
-                if v[:EOC_LEN].tobytes() == EOC:
-                    tlvlen = tlen + llen + vlen + EOC_LEN
-                    obj = self.__class__(
-                        value=tlv[:tlvlen].tobytes(),
-                        expl=self._expl,
-                        optional=self.optional,
-                        _decoded=(offset, 0, tlvlen),
-                    )
-                    obj.lenindef = True
-                    obj.tag = t
-                    return obj, v[EOC_LEN:]
-                else:
-                    chunk, v = Any().decode(
-                        v,
-                        offset=sub_offset,
-                        decode_path=decode_path + (str(chunk_i),),
-                        leavemm=True,
-                        ctx=ctx,
-                    )
-                    vlen += chunk.tlvlen
-                    sub_offset += chunk.tlvlen
-                    chunk_i += 1
+            while v[:EOC_LEN].tobytes() != EOC:
+                chunk, v = Any().decode(
+                    v,
+                    offset=sub_offset,
+                    decode_path=decode_path + (str(chunk_i),),
+                    leavemm=True,
+                    ctx=ctx,
+                )
+                vlen += chunk.tlvlen
+                sub_offset += chunk.tlvlen
+                chunk_i += 1
+            tlvlen = tlen + llen + vlen + EOC_LEN
+            obj = self.__class__(
+                value=tlv[:tlvlen].tobytes(),
+                expl=self._expl,
+                optional=self.optional,
+                _decoded=(offset, 0, tlvlen),
+            )
+            obj.lenindef = True
+            obj.tag = t
+            return obj, v[EOC_LEN:]
         except DecodeError as err:
             raise err.__class__(
                 msg=err.msg,
@@ -4640,15 +4691,24 @@ class Set(Sequence):
             _decoded=(offset, llen, vlen + (EOC_LEN if lenindef else 0)),
         )
         obj._value = values
+        if lenindef:
+            if v[:EOC_LEN].tobytes() != EOC:
+                raise DecodeError(
+                    "no EOC",
+                    klass=self.__class__,
+                    decode_path=decode_path,
+                    offset=offset,
+                )
+            tail = v[EOC_LEN:]
+            obj.lenindef = True
         if not obj.ready:
             raise DecodeError(
-                msg="not all values are ready",
+                "not all values are ready",
                 klass=self.__class__,
                 decode_path=decode_path,
                 offset=offset,
             )
-        obj.lenindef = lenindef
-        return obj, (v[EOC_LEN:] if lenindef else tail)
+        return obj, tail
 
 
 class SequenceOf(Obj):
@@ -4905,10 +4965,19 @@ class SequenceOf(Obj):
             expl=self._expl,
             default=self.default,
             optional=self.optional,
-            _decoded=(offset, llen, vlen),
+            _decoded=(offset, llen, vlen + (EOC_LEN if lenindef else 0)),
         )
-        obj.lenindef = lenindef
-        return obj, (v[EOC_LEN:] if lenindef else tail)
+        if lenindef:
+            if v[:EOC_LEN].tobytes() != EOC:
+                raise DecodeError(
+                    "no EOC",
+                    klass=self.__class__,
+                    decode_path=decode_path,
+                    offset=offset,
+                )
+            obj.lenindef = True
+            tail = v[EOC_LEN:]
+        return obj, tail
 
     def __repr__(self):
         return "%s[%s]" % (
@@ -5018,7 +5087,7 @@ def generic_decoder():  # pragma: no cover
 
 def main():  # pragma: no cover
     import argparse
-    parser = argparse.ArgumentParser(description="PyDERASN ASN.1 DER decoder")
+    parser = argparse.ArgumentParser(description="PyDERASN ASN.1 BER/DER decoder")
     parser.add_argument(
         "--skip",
         type=int,