#!/usr/bin/env python
# coding: utf-8
# cython: language_level=3
-# PyDERASN -- Python ASN.1 DER/BER codec with abstract structures
+# pylint: disable=line-too-long,superfluous-parens,protected-access,too-many-lines
+# pylint: disable=too-many-return-statements,too-many-branches,too-many-statements
+# PyDERASN -- Python ASN.1 DER/CER/BER codec with abstract structures
# Copyright (C) 2017-2020 Sergey Matveev <stargrave@stargrave.org>
#
# This program is free software: you can redistribute it and/or modify
* :ref:`allow_unordered_set <allow_unordered_set_ctx>`
* :ref:`bered <bered_ctx>`
* :ref:`defines_by_path <defines_by_path_ctx>`
+* :ref:`evgen_mode_upto <evgen_mode_upto_ctx>`
.. _pprinting:
Sometimes you either can not or do not want to explicitly set *defines*
in the schema. You can dynamically apply those definitions when calling
-``.decode()`` method.
+:py:meth:`pyderasn.Obj.decode` method.
Specify ``defines_by_path`` key in the :ref:`decode context <ctx>`. Its
value must be sequence of following tuples::
BER encoding
------------
-By default PyDERASN accepts only DER encoded data. It always encodes to
-DER. But you can optionally enable BER decoding with setting ``bered``
-:ref:`context <ctx>` argument to True. Indefinite lengths and
+By default PyDERASN accepts only DER encoded data. By default it encodes
+to DER. But you can optionally enable BER decoding with setting
+``bered`` :ref:`context <ctx>` argument to True. Indefinite lengths and
constructed primitive types should be parsed successfully.
* If object is encoded in BER form (not the DER one), then ``ber_encoded``
This option should be used only for skipping some decode errors, just
to see the decoded structure somehow.
+.. _streaming:
+
+Streaming and dealing with huge structures
+------------------------------------------
+
+.. _evgen_mode:
+
+evgen mode
+__________
+
+ASN.1 structures can be huge, they can hold millions of objects inside
+(for example Certificate Revocation Lists (CRL), holding revocation
+state for every previously issued X.509 certificate). CACert.org's 8 MiB
+CRL file takes more than half a gigabyte of memory to hold the decoded
+structure.
+
+If you just simply want to check the signature over the ``tbsCertList``,
+you can create specialized schema with that field represented as
+OctetString for example::
+
+ class TBSCertListFast(Sequence):
+ schema = (
+ [...]
+ ("revokedCertificates", OctetString(
+ impl=SequenceOf.tag_default,
+ optional=True,
+ )),
+ [...]
+ )
+
+This allows you to quickly decode a few fields and check the signature
+over the ``tbsCertList`` bytes.
+
+But how can you get all certificate's serial number from it, after you
+trust that CRL after signature validation? You can use so called
+``evgen`` (event generation) mode, to catch the events/facts of some
+successful object decoding. Let's use command line capabilities::
+
+ $ python -m pyderasn --schema tests.test_crl:CertificateList --evgen revoke.crl
+ 10 [1,1, 1] . . version: Version INTEGER v2 (01) OPTIONAL
+ 15 [1,1, 9] . . . algorithm: OBJECT IDENTIFIER 1.2.840.113549.1.1.13
+ 26 [0,0, 2] . . . parameters: [UNIV 5] ANY OPTIONAL
+ 13 [1,1, 13] . . signature: AlgorithmIdentifier SEQUENCE
+ 34 [1,1, 3] . . . . . . type: AttributeType OBJECT IDENTIFIER 2.5.4.10
+ 39 [0,0, 9] . . . . . . value: [UNIV 19] AttributeValue ANY
+ 32 [1,1, 14] . . . . . 0: AttributeTypeAndValue SEQUENCE
+ 30 [1,1, 16] . . . . 0: RelativeDistinguishedName SET OF
+ [...]
+ 188 [1,1, 1] . . . . userCertificate: CertificateSerialNumber INTEGER 17 (11)
+ 191 [1,1, 13] . . . . . utcTime: UTCTime UTCTime 2003-04-01T14:25:08
+ 191 [0,0, 15] . . . . revocationDate: Time CHOICE utcTime
+ 191 [1,1, 13] . . . . . utcTime: UTCTime UTCTime 2003-04-01T14:25:08
+ 186 [1,1, 18] . . . 0: RevokedCertificate SEQUENCE
+ 208 [1,1, 1] . . . . userCertificate: CertificateSerialNumber INTEGER 20 (14)
+ 211 [1,1, 13] . . . . . utcTime: UTCTime UTCTime 2002-10-01T02:18:01
+ 211 [0,0, 15] . . . . revocationDate: Time CHOICE utcTime
+ 211 [1,1, 13] . . . . . utcTime: UTCTime UTCTime 2002-10-01T02:18:01
+ 206 [1,1, 18] . . . 1: RevokedCertificate SEQUENCE
+ [...]
+ 9144992 [0,0, 15] . . . . revocationDate: Time CHOICE utcTime
+ 9144992 [1,1, 13] . . . . . utcTime: UTCTime UTCTime 2020-02-08T07:25:06
+ 9144985 [1,1, 20] . . . 415755: RevokedCertificate SEQUENCE
+ 181 [1,4,9144821] . . revokedCertificates: RevokedCertificates SEQUENCE OF OPTIONAL
+ 5 [1,4,9144997] . tbsCertList: TBSCertList SEQUENCE
+ 9145009 [1,1, 9] . . algorithm: OBJECT IDENTIFIER 1.2.840.113549.1.1.13
+ 9145020 [0,0, 2] . . parameters: [UNIV 5] ANY OPTIONAL
+ 9145007 [1,1, 13] . signatureAlgorithm: AlgorithmIdentifier SEQUENCE
+ 9145022 [1,3, 513] . signatureValue: BIT STRING 4096 bits
+ 0 [1,4,9145534] CertificateList SEQUENCE
+
+Here we see how decoder works: it decodes SEQUENCE's tag, length, then
+decodes underlying values. It can not tell if SEQUENCE is decoded, so
+the event of the upper level SEQUENCE is the last one we see.
+``version`` field is just a single INTEGER -- it is decoded and event is
+fired immediately. Then we see that ``algorithm`` and ``parameters``
+fields are decoded and only after them the ``signature`` SEQUENCE is
+fired as a successfully decoded. There are 4 events for each revoked
+certificate entry in that CRL: ``userCertificate`` serial number,
+``utcTime`` of ``revocationDate`` CHOICE, ``RevokedCertificate`` itself
+as a one of entity in ``revokedCertificates`` SEQUENCE OF.
+
+We can do that in our ordinary Python code and understand where we are
+by looking at deterministically generated decode paths (do not forget
+about useful ``--print-decode-path`` CLI option). We must use
+:py:meth:`pyderasn.Obj.decode_evgen` method, instead of ordinary
+:py:meth:`pyderasn.Obj.decode`. It is generator yielding ``(decode_path,
+obj, tail)`` tuples::
+
+ for decode_path, obj, _ in CertificateList().decode_evgen(crl_raw):
+ if (
+ len(decode_path) == 4 and
+ decode_path[:2] == ("tbsCertList", "revokedCertificates"),
+ decode_path[3] == "userCertificate"
+ ):
+ print("serial number:", int(obj))
+
+Virtually it does not take any memory except at least needed for single
+object storage. You can easily use that mode to determine required
+object ``.offset`` and ``.*len`` to be able to decode it separately, or
+maybe verify signature upon it just by taking bytes by ``.offset`` and
+``.tlvlen``.
+
+.. _evgen_mode_upto_ctx:
+
+evgen_mode_upto
+_______________
+
+There is full ability to get any kind of data from the CRL in the
+example above. However it is not too convenient to get the whole
+``RevokedCertificate`` structure, that is pretty lightweight and one may
+do not want to disassemble it. You can use ``evgen_mode_upto``
+:ref:`ctx <ctx>` option that semantically equals to
+:ref:`defines_by_path <defines_by_path_ctx>` -- list of decode paths
+mapped to any non-None value. If specified decode path is met, then any
+subsequent objects won't be decoded in evgen mode. That allows us to
+parse the CRL above with fully assembled ``RevokedCertificate``::
+
+ for decode_path, obj, _ in CertificateList().decode_evgen(
+ crl_raw,
+ ctx={"evgen_mode_upto": (
+ (("tbsCertList", "revokedCertificates", any), True),
+ )},
+ ):
+ if (
+ len(decode_path) == 3 and
+ decode_path[:2] == ("tbsCertList", "revokedCertificates"),
+ ):
+ print("serial number:", int(obj["userCertificate"]))
+
+.. note::
+
+ SEQUENCE/SET values with DEFAULT specified are automatically decoded
+ without evgen mode.
+
+.. _mmap:
+
+mmap-ed file
+____________
+
+POSIX compliant systems have ``mmap`` syscall, giving ability to work
+the memory mapped file. You can deal with the file like it was an
+ordinary binary string, allowing you not to load it to the memory first.
+Also you can use them as an input for OCTET STRING, taking no Python
+memory for their storage.
+
+There is convenient :py:func:`pyderasn.file_mmaped` function that
+creates read-only memoryview on the file contents::
+
+ with open("huge", "rb") as fd:
+ raw = file_mmaped(fd)
+ obj = Something.decode(raw)
+
+.. warning::
+
+ mmap-ed files in Python2.7 does not implement buffer protocol, so
+ memoryview won't work on them.
+
+.. warning::
+
+ mmap maps the **whole** file. So it plays no role if you seek-ed it
+ before. Take the slice of the resulting memoryview with required
+ offset instead.
+
+.. note::
+
+ If you use ZFS as underlying storage, then pay attention that
+ currently most platforms does not deal good with ZFS ARC and ordinary
+ page cache used for mmaps. It can take twice the necessary size in
+ the memory: both in page cache and ZFS ARC.
+
+CER encoding
+____________
+
+We can parse any kind of data now, but how can we produce files
+streamingly, without storing their encoded representation in memory?
+SEQUENCE by default encodes in memory all its values, joins them in huge
+binary string, just to know the exact size of SEQUENCE's value for
+encoding it in TLV. DER requires you to know all exact sizes of the
+objects.
+
+You can use CER encoding mode, that slightly differs from the DER, but
+does not require exact sizes knowledge, allowing streaming encoding
+directly to some writer/buffer. Just use
+:py:meth:`pyderasn.Obj.encode_cer` method, providing the writer where
+encoded data will flow::
+
+ opener = io.open if PY2 else open
+ with opener("result", "wb") as fd:
+ obj.encode_cer(fd.write)
+
+::
+
+ buf = io.BytesIO()
+ obj.encode_cer(buf.write)
+
+If you do not want to create in-memory buffer every time, then you can
+use :py:func:`pyderasn.encode_cer` function::
+
+ data = encode_cer(obj)
+
+Remember that CER is **not valid** DER in most cases, so you **have to**
+use :ref:`bered <bered_ctx>` :ref:`ctx <ctx>` option during its
+decoding. Also currently there is **no** validation that provided CER is
+valid one -- you are sure that it has only valid BER encoding.
+
+.. warning::
+
+ SET OF values can not be streamingly encoded, because they are
+ required to be sorted byte-by-byte. Big SET OF values still will take
+ much memory. Use neither SET nor SET OF values, as modern ASN.1
+ also recommends too.
+
+Do not forget about using :ref:`mmap-ed <mmap>` memoryviews for your
+OCTET STRINGs! They will be streamingly copied from underlying file to
+the buffer using 1 KB chunks.
+
+Some structures require that some of the elements have to be forcefully
+DER encoded. For example ``SignedData`` CMS requires you to encode
+``SignedAttributes`` and X.509 certificates in DER form, allowing you to
+encode everything else in BER. You can tell any of the structures to be
+forcefully encoded in DER during CER encoding, by specifying
+``der_forced=True`` attribute::
+
+ class Certificate(Sequence):
+ schema = (...)
+ der_forced = True
+
+ class SignedAttributes(SetOf):
+ schema = Attribute()
+ bounds = (1, 32)
+ der_forced = True
+
+.. _agg_octet_string:
+
+agg_octet_string
+________________
+
+In most cases, huge quantity of binary data is stored as OCTET STRING.
+CER encoding splits it on 1 KB chunks. BER allows splitting on various
+levels of chunks inclusion::
+
+ SOME STRING[CONSTRUCTED]
+ OCTET STRING[CONSTRUCTED]
+ OCTET STRING[PRIMITIVE]
+ DATA CHUNK
+ OCTET STRING[PRIMITIVE]
+ DATA CHUNK
+ OCTET STRING[PRIMITIVE]
+ DATA CHUNK
+ OCTET STRING[PRIMITIVE]
+ DATA CHUNK
+ OCTET STRING[CONSTRUCTED]
+ OCTET STRING[PRIMITIVE]
+ DATA CHUNK
+ OCTET STRING[PRIMITIVE]
+ DATA CHUNK
+ OCTET STRING[CONSTRUCTED]
+ OCTET STRING[CONSTRUCTED]
+ OCTET STRING[PRIMITIVE]
+ DATA CHUNK
+
+You can not just take the offset and some ``.vlen`` of the STRING and
+treat it as the payload. If you decode it without
+:ref:`evgen mode <evgen_mode>`, then it will be automatically aggregated
+and ``bytes()`` will give the whole payload contents.
+
+You are forced to use :ref:`evgen mode <evgen_mode>` for decoding for
+small memory footprint. There is convenient
+:py:func:`pyderasn.agg_octet_string` helper for reconstructing the
+payload. Let's assume you have got BER/CER encoded ``ContentInfo`` with
+huge ``SignedData`` and ``EncapsulatedContentInfo``. Let's calculate the
+SHA512 digest of its ``eContent``::
+
+ fd = open("data.p7m", "rb")
+ raw = file_mmaped(fd)
+ ctx = {"bered": True}
+ for decode_path, obj, _ in ContentInfo().decode_evgen(raw, ctx=ctx):
+ if decode_path == ("content",):
+ content = obj
+ break
+ else:
+ raise ValueError("no content found")
+ hasher_state = sha512()
+ def hasher(data):
+ hasher_state.update(data)
+ return len(data)
+ evgens = SignedData().decode_evgen(
+ raw[content.offset:],
+ offset=content.offset,
+ ctx=ctx,
+ )
+ agg_octet_string(evgens, ("encapContentInfo", "eContent"), raw, hasher)
+ fd.close()
+ digest = hasher_state.digest()
+
+Simply replace ``hasher`` with some writeable file's ``fd.write`` to
+copy the payload (without BER/CER encoding interleaved overhead) in it.
+Virtually it won't take memory more than for keeping small structures
+and 1 KB binary chunks.
+
+.. _seqof-iterators:
+
+SEQUENCE OF iterators
+_____________________
+
+You can use iterators as a value in :py:class:`pyderasn.SequenceOf`
+classes. The only difference with providing the full list of objects, is
+that type and bounds checking is done during encoding process. Also
+sequence's value will be emptied after encoding, forcing you to set its
+value again.
+
+This is very useful when you have to create some huge objects, like
+CRLs, with thousands and millions of entities inside. You can write the
+generator taking necessary data from the database and giving the
+``RevokedCertificate`` objects. Only binary representation of that
+objects will take memory during DER encoding.
+
+2-pass DER encoding
+-------------------
+
+There is ability to do 2-pass encoding to DER, writing results directly
+to specified writer (buffer, file, whatever). It could be 1.5+ times
+slower than ordinary encoding, but it takes little memory for 1st pass
+state storing. For example, 1st pass state for CACert.org's CRL with
+~416K of certificate entries takes nearly 3.5 MB of memory.
+``SignedData`` with several gigabyte ``EncapsulatedContentInfo`` takes
+nearly 0.5 KB of memory.
+
+If you use :ref:`mmap-ed <mmap>` memoryviews, :ref:`SEQUENCE OF
+iterators <seqof-iterators>` and write directly to opened file, then
+there is very small memory footprint.
+
+1st pass traverses through all the objects of the structure and returns
+the size of DER encoded structure, together with 1st pass state object.
+That state contains precalculated lengths for various objects inside the
+structure.
+
+::
+
+ fulllen, state = obj.encode1st()
+
+2nd pass takes the writer and 1st pass state. It traverses through all
+the objects again, but writes their encoded representation to the writer.
+
+::
+
+ opener = io.open if PY2 else open
+ with opener("result", "wb") as fd:
+ obj.encode2nd(fd.write, iter(state))
+
+.. warning::
+
+ You **MUST NOT** use 1st pass state if anything is changed in the
+ objects. It is intended to be used immediately after 1st pass is
+ done!
+
+If you use :ref:`SEQUENCE OF iterators <seqof-iterators>`, then you
+have to reinitialize the values after the 1st pass. And you **have to**
+be sure that the iterator gives exactly the same values as previously.
+Yes, you have to run your iterator twice -- because this is two pass
+encoding mode.
+
+If you want to encode to the memory, then you can use convenient
+:py:func:`pyderasn.encode2pass` helper.
+
Base Obj
--------
.. autoclass:: pyderasn.Obj
-------
.. autofunction:: pyderasn.abs_decode_path
+.. autofunction:: pyderasn.agg_octet_string
.. autofunction:: pyderasn.colonize_hex
+.. autofunction:: pyderasn.encode2pass
.. autofunction:: pyderasn.encode_cer
.. autofunction:: pyderasn.file_mmaped
.. autofunction:: pyderasn.hexenc
. . 05:00
"""
+from array import array
from codecs import getdecoder
from codecs import getencoder
from collections import namedtuple
from operator import attrgetter
from string import ascii_letters
from string import digits
+from sys import maxsize as sys_maxsize
from sys import version_info
from unicodedata import category as unicat
def colored(what, *args, **kwargs):
return what
-__version__ = "7.0"
+__version__ = "7.4"
__all__ = (
"agg_octet_string",
"Boolean",
"BoundsError",
"Choice",
+ "colonize_hex",
"DecodeError",
"DecodePathDefBy",
+ "encode2pass",
"encode_cer",
"Enumerated",
"ExceedingData",
SET01 = frozenset("01")
DECIMALS = frozenset(digits)
DECIMAL_SIGNS = ".,"
+NEXT_ATTR_NAME = "next" if PY2 else "__next__"
def file_mmaped(fd):
return l, 1 + octets_num, data[1 + octets_num:]
+LEN0 = len_encode(0)
+LEN1 = len_encode(1)
LEN1K = len_encode(1000)
+def len_size(l):
+ """How many bytes length field will take
+ """
+ if l < 128:
+ return 1
+ if l < 256: # 1 << 8
+ return 2
+ if l < 65536: # 1 << 16
+ return 3
+ if l < 16777216: # 1 << 24
+ return 4
+ if l < 4294967296: # 1 << 32
+ return 5
+ if l < 1099511627776: # 1 << 40
+ return 6
+ if l < 281474976710656: # 1 << 48
+ return 7
+ if l < 72057594037927936: # 1 << 56
+ return 8
+ raise OverflowError("too big length")
+
+
def write_full(writer, data):
"""Fully write provided data
- BytesIO does not guarantee that the whole data will be written at once.
+ :param writer: must comply with ``io.RawIOBase.write`` behaviour
+
+ BytesIO does not guarantee that the whole data will be written at
+ once. That function write everything provided, raising an error if
+ ``writer`` returns None.
"""
data = memoryview(data)
written = 0
written += n
+# If it is 64-bit system, then use compact 64-bit array of unsigned
+# longs. Use an ordinary list with universal integers otherwise, that
+# is slower.
+if sys_maxsize > 2 ** 32:
+ def state_2pass_new():
+ return array("L")
+else:
+ def state_2pass_new():
+ return []
+
+
########################################################################
# Base class
########################################################################
@property
def tlen(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return len(self.tag)
@property
def tlvlen(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return self.tlen + self.llen + self.vlen
def _encode(self): # pragma: no cover
raise NotImplementedError()
+ def _encode_cer(self, writer):
+ write_full(writer, self._encode())
+
def _decode(self, tlv, offset, decode_path, ctx, tag_only, evgen_mode): # pragma: no cover
yield NotImplemented
+ def _encode1st(self, state):
+ raise NotImplementedError()
+
+ def _encode2nd(self, writer, state_iter):
+ raise NotImplementedError()
+
def encode(self):
- """Encode the structure
+ """DER encode the structure
:returns: DER representation
"""
return raw
return b"".join((self._expl, len_encode(len(raw)), raw))
+ def encode1st(self, state=None):
+ """Do the 1st pass of 2-pass encoding
+
+ :rtype: (int, array("L"))
+ :returns: full length of encoded data and precalculated various
+ objects lengths
+ """
+ if state is None:
+ state = state_2pass_new()
+ if self._expl is None:
+ return self._encode1st(state)
+ state.append(0)
+ idx = len(state) - 1
+ vlen, _ = self._encode1st(state)
+ state[idx] = vlen
+ fulllen = len(self._expl) + len_size(vlen) + vlen
+ return fulllen, state
+
+ def encode2nd(self, writer, state_iter):
+ """Do the 2nd pass of 2-pass encoding
+
+ :param writer: must comply with ``io.RawIOBase.write`` behaviour
+ :param state_iter: iterator over the 1st pass state (``iter(state)``)
+ """
+ if self._expl is None:
+ self._encode2nd(writer, state_iter)
+ else:
+ write_full(writer, self._expl + len_encode(next(state_iter)))
+ self._encode2nd(writer, state_iter)
+
def encode_cer(self, writer):
+ """CER encode the structure to specified writer
+
+ :param writer: must comply with ``io.RawIOBase.write``
+ behaviour. It takes slice to be written and
+ returns number of bytes processed. If it returns
+ None, then exception will be raised
+ """
if self._expl is not None:
write_full(writer, self._expl + LENINDEF)
if getattr(self, "der_forced", False):
if self._expl is not None:
write_full(writer, EOC)
- def _encode_cer(self, writer):
- write_full(writer, self._encode())
-
def hexencode(self):
"""Do hexadecimal encoded :py:meth:`pyderasn.Obj.encode`
"""
tag_only=False,
_ctx_immutable=True,
):
+ """Decode the data
+
+ :param data: either binary or memoryview
+ :param int offset: initial data's offset
+ :param bool leavemm: do we need to leave memoryview of remaining
+ data as is, or convert it to bytes otherwise
+ :param decode_path: current decode path (tuples of strings,
+ possibly with DecodePathDefBy) with will be
+ the root for all underlying objects
+ :param ctx: optional :ref:`context <ctx>` governing decoding process
+ :param bool tag_only: decode only the tag, without length and
+ contents (used only in Choice and Set
+ structures, trying to determine if tag satisfies
+ the schema)
+ :param bool _ctx_immutable: do we need to ``copy.copy()`` ``ctx``
+ before using it?
+ :returns: (Obj, remaining data)
+
+ .. seealso:: :ref:`decoding`
+ """
result = next(self.decode_evgen(
data,
offset,
_ctx_immutable=True,
_evgen_mode=True,
):
- """Decode the data
-
- :param data: either binary or memoryview
- :param int offset: initial data's offset
- :param bool leavemm: do we need to leave memoryview of remaining
- data as is, or convert it to bytes otherwise
- :param ctx: optional :ref:`context <ctx>` governing decoding process
- :param tag_only: decode only the tag, without length and contents
- (used only in Choice and Set structures, trying to
- determine if tag satisfies the schema)
- :param _ctx_immutable: do we need to ``copy.copy()`` ``ctx``
- before using it?
- :returns: (Obj, remaining data)
+ """Decode with evgen mode on
- .. seealso:: :ref:`decoding`
+ That method is identical to :py:meth:`pyderasn.Obj.decode`, but
+ it returns the generator producing ``(decode_path, obj, tail)``
+ values.
+ .. seealso:: :ref:`evgen mode <evgen_mode>`.
"""
if ctx is None:
ctx = {}
@property
def expled(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return self._expl is not None
@property
def expl_tag(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return self._expl
@property
def expl_tlen(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return len(self._expl)
@property
def expl_llen(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
if self.expl_lenindef:
return 1
@property
def expl_offset(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return self.offset - self.expl_tlen - self.expl_llen
@property
def expl_vlen(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return self.tlvlen
@property
def expl_tlvlen(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return self.expl_tlen + self.expl_llen + self.expl_vlen
@property
def fulloffset(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return self.expl_offset if self.expled else self.offset
@property
def fulllen(self):
- """See :ref:`decoding`
+ """.. seealso:: :ref:`decoding`
"""
return self.expl_tlvlen if self.expled else self.tlvlen
def encode_cer(obj):
- """Encode to CER in memory
+ """Encode to CER in memory buffer
+
+ :returns bytes: memory buffer contents
"""
buf = BytesIO()
obj.encode_cer(buf.write)
return buf.getvalue()
+def encode2pass(obj):
+ """Encode (2-pass mode) to DER in memory buffer
+
+ :returns bytes: memory buffer contents
+ """
+ buf = BytesIO()
+ _, state = obj.encode1st()
+ obj.encode2nd(buf.write, iter(state))
+ return buf.getvalue()
+
+
class DecodePathDefBy(object):
"""DEFINED BY representation inside decode path
"""
def _encode(self):
self._assert_ready()
- return b"".join((
- self.tag,
- len_encode(1),
- (b"\xFF" if self._value else b"\x00"),
- ))
+ return b"".join((self.tag, LEN1, (b"\xFF" if self._value else b"\x00")))
+
+ def _encode1st(self, state):
+ return len(self.tag) + 2, state
+
+ def _encode2nd(self, writer, state_iter):
+ self._assert_ready()
+ write_full(writer, self._encode())
def _decode(self, tlv, offset, decode_path, ctx, tag_only, evgen_mode):
try:
def __hash__(self):
self._assert_ready()
- return hash(
- self.tag +
- bytes(self._expl or b"") +
+ return hash(b"".join((
+ self.tag,
+ bytes(self._expl or b""),
str(self._value).encode("ascii"),
- )
+ )))
def __eq__(self, their):
if isinstance(their, integer_types):
_specs=self.specs,
)
- def _encode(self):
+ def _encode_payload(self):
self._assert_ready()
value = self._value
if PY2:
bytes_len += 1
else:
break
+ return octets
return b"".join((self.tag, len_encode(len(octets)), octets))
+ def _encode(self):
+ octets = self._encode_payload()
+ return b"".join((self.tag, len_encode(len(octets)), octets))
+
+ def _encode1st(self, state):
+ l = len(self._encode_payload())
+ return len(self.tag) + len_size(l) + l, state
+
+ def _encode2nd(self, writer, state_iter):
+ write_full(writer, self._encode())
+
def _decode(self, tlv, offset, decode_path, ctx, tag_only, evgen_mode):
try:
t, _, lv = tag_strip(tlv)
octets,
))
+ def _encode1st(self, state):
+ self._assert_ready()
+ _, octets = self._value
+ l = len(octets) + 1
+ return len(self.tag) + len_size(l) + l, state
+
+ def _encode2nd(self, writer, state_iter):
+ bit_len, octets = self._value
+ write_full(writer, b"".join((
+ self.tag,
+ len_encode(len(octets) + 1),
+ int2byte((8 - bit_len % 8) % 8),
+ )))
+ write_full(writer, octets)
+
def _encode_cer(self, writer):
bit_len, octets = self._value
if len(octets) + 1 <= 1000:
self._value,
))
+ def _encode1st(self, state):
+ self._assert_ready()
+ l = len(self._value)
+ return len(self.tag) + len_size(l) + l, state
+
+ def _encode2nd(self, writer, state_iter):
+ value = self._value
+ write_full(writer, self.tag + len_encode(len(value)))
+ write_full(writer, value)
+
def _encode_cer(self, writer):
octets = self._value
if len(octets) <= 1000:
:param evgens: iterator of generated events
:param decode_path: points to the string we want to decode
:param raw: slicebable (memoryview, bytearray, etc) with
- the data evgens are generated one
+ the data evgens are generated on
:param writer: buffer.write where string is going to be saved
+ :param writer: where string is going to be saved. Must comply
+ with ``io.RawIOBase.write`` behaviour
+
+ .. seealso:: :ref:`agg_octet_string`
"""
decode_path_len = len(decode_path)
for dp, obj, _ in evgens:
)
def _encode(self):
- return self.tag + len_encode(0)
+ return self.tag + LEN0
+
+ def _encode1st(self, state):
+ return len(self.tag) + 1, state
+
+ def _encode2nd(self, writer, state_iter):
+ write_full(writer, self.tag + LEN0)
def _decode(self, tlv, offset, decode_path, ctx, tag_only, evgen_mode):
try:
tuple element is ``{OID: pyderasn.Obj()}``
dictionary, mapping between current OID value
and structure applied to defined field.
- :ref:`Read about DEFINED BY <definedby>`
+
+ .. seealso:: :ref:`definedby`
+
:param bytes impl: override default tag with ``IMPLICIT`` one
:param bytes expl: override default tag with ``EXPLICIT`` one
:param default: set default value. Type same as in ``value``
def __add__(self, their):
if their.__class__ == tuple:
- return self.__class__(self._value + their)
+ return self.__class__(self._value + array("L", their))
if isinstance(their, self.__class__):
return self.__class__(self._value + their._value)
raise InvalidValueType((self.__class__, tuple))
return value._value
if isinstance(value, string_types):
try:
- value = tuple(pureint(arc) for arc in value.split("."))
+ value = array("L", (pureint(arc) for arc in value.split(".")))
except ValueError:
raise InvalidOID("unacceptable arcs values")
if value.__class__ == tuple:
+ try:
+ value = array("L", value)
+ except OverflowError as err:
+ raise InvalidOID(repr(err))
+ if value.__class__ is array:
if len(value) < 2:
raise InvalidOID("less than 2 arcs")
first_arc = value[0]
def __hash__(self):
self._assert_ready()
- return hash(
- self.tag +
- bytes(self._expl or b"") +
+ return hash(b"".join((
+ self.tag,
+ bytes(self._expl or b""),
str(self._value).encode("ascii"),
- )
+ )))
def __eq__(self, their):
if their.__class__ == tuple:
- return self._value == their
+ return self._value == array("L", their)
if not issubclass(their.__class__, ObjectIdentifier):
return False
return (
optional=self.optional if optional is None else optional,
)
- def _encode(self):
+ def _encode_octets(self):
self._assert_ready()
value = self._value
first_value = value[1]
octets = [zero_ended_encode(first_value)]
for arc in value[2:]:
octets.append(zero_ended_encode(arc))
- v = b"".join(octets)
+ return b"".join(octets)
+
+ def _encode(self):
+ v = self._encode_octets()
return b"".join((self.tag, len_encode(len(v)), v))
+ def _encode1st(self, state):
+ l = len(self._encode_octets())
+ return len(self.tag) + len_size(l) + l, state
+
+ def _encode2nd(self, writer, state_iter):
+ write_full(writer, self._encode())
+
def _decode(self, tlv, offset, decode_path, ctx, tag_only, evgen_mode):
try:
t, _, lv = tag_strip(tlv)
offset=offset,
)
v, tail = v[:l], v[l:]
- arcs = []
+ arcs = array("L")
ber_encoded = False
while len(v) > 0:
i = 0
if ctx.get("bered", False):
ber_encoded = True
else:
- raise DecodeError("non normalized arc encoding")
+ raise DecodeError(
+ "non normalized arc encoding",
+ klass=self.__class__,
+ decode_path=decode_path,
+ offset=offset,
+ )
arc = (arc << 7) | (octet & 0x7F)
if octet & 0x80 == 0:
- arcs.append(arc)
+ try:
+ arcs.append(arc)
+ except OverflowError:
+ raise DecodeError(
+ "too huge value for local unsigned long",
+ klass=self.__class__,
+ decode_path=decode_path,
+ offset=offset,
+ )
v = v[i + 1:]
break
i += 1
first_arc = 2
second_arc -= 80
obj = self.__class__(
- value=tuple([first_arc, second_arc] + arcs[1:]),
+ value=array("L", (first_arc, second_arc)) + arcs[1:],
impl=self.tag,
expl=self._expl,
default=self.default,
LEN_YYMMDDHHMMSSZ = len("YYMMDDHHMMSSZ")
+LEN_LEN_YYMMDDHHMMSSZ = len_encode(LEN_YYMMDDHHMMSSZ)
+LEN_YYMMDDHHMMSSZ_WITH_LEN = len(LEN_LEN_YYMMDDHHMMSSZ) + LEN_YYMMDDHHMMSSZ
LEN_YYYYMMDDHHMMSSDMZ = len("YYYYMMDDHHMMSSDMZ")
LEN_YYYYMMDDHHMMSSZ = len("YYYYMMDDHHMMSSZ")
+LEN_LEN_YYYYMMDDHHMMSSZ = len_encode(LEN_YYYYMMDDHHMMSSZ)
class VisibleString(CommonString):
if self.ber_encoded:
value += " (%s)" % self.ber_raw
return value
+ return None
def __unicode__(self):
if self.ready:
def _encode(self):
self._assert_ready()
- value = self._encode_time()
- return b"".join((self.tag, len_encode(len(value)), value))
+ return b"".join((self.tag, LEN_LEN_YYMMDDHHMMSSZ, self._encode_time()))
+
+ def _encode1st(self, state):
+ return len(self.tag) + LEN_YYMMDDHHMMSSZ_WITH_LEN, state
+
+ def _encode2nd(self, writer, state_iter):
+ self._assert_ready()
+ write_full(writer, self._encode())
def _encode_cer(self, writer):
write_full(writer, self._encode())
encoded += (".%06d" % value.microsecond).rstrip("0")
return (encoded + "Z").encode("ascii")
+ def _encode(self):
+ self._assert_ready()
+ value = self._value
+ if value.microsecond > 0:
+ encoded = self._encode_time()
+ return b"".join((self.tag, len_encode(len(encoded)), encoded))
+ return b"".join((self.tag, LEN_LEN_YYYYMMDDHHMMSSZ, self._encode_time()))
+
+ def _encode1st(self, state):
+ self._assert_ready()
+ vlen = len(self._encode_time())
+ return len(self.tag) + len_size(vlen) + vlen, state
+
+ def _encode2nd(self, writer, state_iter):
+ write_full(writer, self._encode())
+
class GraphicString(CommonString):
__slots__ = ()
self._assert_ready()
return self._value[1].encode()
+ def _encode1st(self, state):
+ self._assert_ready()
+ return self._value[1].encode1st(state)
+
+ def _encode2nd(self, writer, state_iter):
+ self._value[1].encode2nd(writer, state_iter)
+
def _encode_cer(self, writer):
self._assert_ready()
self._value[1].encode_cer(writer)
return value
return value.encode()
+ def _encode1st(self, state):
+ self._assert_ready()
+ value = self._value
+ if value.__class__ == binary_type:
+ return len(value), state
+ return value.encode1st(state)
+
+ def _encode2nd(self, writer, state_iter):
+ value = self._value
+ if value.__class__ == binary_type:
+ write_full(writer, value)
+ else:
+ value.encode2nd(writer, state_iter)
+
def _encode_cer(self, writer):
self._assert_ready()
value = self._value
# ASN.1 constructed types
########################################################################
-def get_def_by_path(defines_by_path, sub_decode_path):
- """Get define by decode path
- """
- for path, define in defines_by_path:
- if len(path) != len(sub_decode_path):
- continue
- for p1, p2 in zip(path, sub_decode_path):
- if (not p1 is any) and (p1 != p2):
- break
- else:
- return define
-
-
def abs_decode_path(decode_path, rel_path):
"""Create an absolute decode path from current and relative ones
)
-class Sequence(Obj):
+class SequenceEncode1stMixing(object):
+ def _encode1st(self, state):
+ state.append(0)
+ idx = len(state) - 1
+ vlen = 0
+ for v in self._values_for_encoding():
+ l, _ = v.encode1st(state)
+ vlen += l
+ state[idx] = vlen
+ return len(self.tag) + len_size(vlen) + vlen, state
+
+
+class Sequence(SequenceEncode1stMixing, Obj):
"""``SEQUENCE`` structure type
You have to make specification of sequence::
defaulted values existence validation by setting
``"allow_default_values": True`` :ref:`context <ctx>` option.
- .. warning::
-
- Check for default value existence is not performed in
- ``evgen_mode``, because previously decoded values are not stored
- in memory, to be able to compare them.
+ All values with DEFAULT specified are decoded atomically in
+ :ref:`evgen mode <evgen_mode>`. If DEFAULT value is some kind of
+ SEQUENCE, then it will be yielded as a single element, not
+ disassembled. That is required for DEFAULT existence check.
Two sequences are equal if they have equal specification (schema),
implicit/explicit tagging and the same values.
v = b"".join(v.encode() for v in self._values_for_encoding())
return b"".join((self.tag, len_encode(len(v)), v))
+ def _encode2nd(self, writer, state_iter):
+ write_full(writer, self.tag + len_encode(next(state_iter)))
+ for v in self._values_for_encoding():
+ v.encode2nd(writer, state_iter)
+
def _encode_cer(self, writer):
write_full(writer, self.tag + LENINDEF)
for v in self._values_for_encoding():
len(v) == 0
):
continue
+ spec_defaulted = spec.default is not None
sub_decode_path = decode_path + (name,)
try:
- if evgen_mode:
+ if evgen_mode and not spec_defaulted:
for _decode_path, value, v_tail in spec.decode_evgen(
v,
sub_offset,
vlen += value_len
sub_offset += value_len
v = v_tail
- if not evgen_mode:
- if spec.default is not None and value == spec.default:
- # This will not work in evgen_mode
+ if spec_defaulted:
+ if evgen_mode:
+ yield sub_decode_path, value, v_tail
+ if value == spec.default:
if ctx_bered or ctx_allow_default_values:
ber_encoded = True
else:
decode_path=sub_decode_path,
offset=sub_offset,
)
+ if not evgen_mode:
values[name] = value
spec_defines = getattr(spec, "defines", ())
if len(spec_defines) == 0:
yield pp
-class Set(Sequence):
+class Set(Sequence, SequenceEncode1stMixing):
"""``SET`` structure type
Its usage is identical to :py:class:`pyderasn.Sequence`.
tag_default = tag_encode(form=TagFormConstructed, num=17)
asn1_type_name = "SET"
- def _encode(self):
- v = b"".join(value.encode() for value in sorted(
- self._values_for_encoding(),
+ def _values_for_encoding(self):
+ return sorted(
+ super(Set, self)._values_for_encoding(),
key=attrgetter("tag_order"),
- ))
- return b"".join((self.tag, len_encode(len(v)), v))
+ )
def _encode_cer(self, writer):
write_full(writer, self.tag + LENINDEF)
for v in sorted(
- self._values_for_encoding(),
+ super(Set, self)._values_for_encoding(),
key=attrgetter("tag_order_cer"),
):
v.encode_cer(writer)
decode_path=decode_path,
offset=offset,
)
- if evgen_mode:
+ spec_defaulted = spec.default is not None
+ if evgen_mode and not spec_defaulted:
for _decode_path, value, v_tail in spec.decode_evgen(
v,
sub_offset,
decode_path=sub_decode_path,
offset=sub_offset,
)
- if spec.default is None or value != spec.default:
- pass
- elif ctx_bered or ctx_allow_default_values:
- ber_encoded = True
- else:
- raise DecodeError(
- "DEFAULT value met",
- klass=self.__class__,
- decode_path=sub_decode_path,
- offset=sub_offset,
- )
+ if spec_defaulted:
+ if evgen_mode:
+ yield sub_decode_path, value, v_tail
+ if value != spec.default:
+ pass
+ elif ctx_bered or ctx_allow_default_values:
+ ber_encoded = True
+ else:
+ raise DecodeError(
+ "DEFAULT value met",
+ klass=self.__class__,
+ decode_path=sub_decode_path,
+ offset=sub_offset,
+ )
values[name] = value
del _specs_items[name]
tag_order_prev = value_tag_order
)
-class SequenceOf(Obj):
+class SequenceOf(SequenceEncode1stMixing, Obj):
"""``SEQUENCE OF`` sequence type
For that kind of type you must specify the object it will carry on
>>> ints
Ints SEQUENCE OF[INTEGER 123, INTEGER 345]
- Also you can initialize sequence with preinitialized values:
+ You can initialize sequence with preinitialized values:
>>> ints = Ints([Integer(123), Integer(234)])
+
+ Also you can use iterator as a value:
+
+ >>> ints = Ints(iter(Integer(i) for i in range(1000000)))
+
+ And it won't be iterated until encoding process. Pay attention that
+ bounds and required schema checks are done only during the encoding
+ process in that case! After encode was called, then value is zeroed
+ back to empty list and you have to set it again. That mode is useful
+ mainly with CER encoding mode, where all objects from the iterable
+ will be streamed to the buffer, without copying all of them to
+ memory first.
"""
__slots__ = ("spec", "_bound_min", "_bound_max")
tag_default = tag_encode(form=TagFormConstructed, num=16)
self._value = copy(default_obj._value)
def _value_sanitize(self, value):
+ iterator = False
if issubclass(value.__class__, SequenceOf):
value = value._value
+ elif hasattr(value, NEXT_ATTR_NAME):
+ iterator = True
elif hasattr(value, "__iter__"):
value = list(value)
else:
- raise InvalidValueType((self.__class__, iter))
- if not self._bound_min <= len(value) <= self._bound_max:
- raise BoundsError(self._bound_min, len(value), self._bound_max)
- for v in value:
- if not isinstance(v, self.spec.__class__):
- raise InvalidValueType((self.spec.__class__,))
+ raise InvalidValueType((self.__class__, iter, "iterator"))
+ if not iterator:
+ if not self._bound_min <= len(value) <= self._bound_max:
+ raise BoundsError(self._bound_min, len(value), self._bound_max)
+ class_expected = self.spec.__class__
+ for v in value:
+ if not isinstance(v, class_expected):
+ raise InvalidValueType((class_expected,))
return value
@property
def ready(self):
+ if hasattr(self._value, NEXT_ATTR_NAME):
+ return True
+ if self._bound_min > 0 and len(self._value) == 0:
+ return False
return all(v.ready for v in self._value)
@property
return any(v.bered for v in self._value)
def __getstate__(self):
+ if hasattr(self._value, NEXT_ATTR_NAME):
+ raise ValueError("can not pickle SequenceOf with iterator")
return SequenceOfState(
__version__,
self.tag,
self._value.append(value)
def __iter__(self):
- self._assert_ready()
return iter(self._value)
def __len__(self):
- self._assert_ready()
return len(self._value)
def __setitem__(self, key, value):
return iter(self._value)
def _encode(self):
- v = b"".join(v.encode() for v in self._values_for_encoding())
- return b"".join((self.tag, len_encode(len(v)), v))
+ iterator = hasattr(self._value, NEXT_ATTR_NAME)
+ if iterator:
+ values = []
+ values_append = values.append
+ class_expected = self.spec.__class__
+ values_for_encoding = self._values_for_encoding()
+ self._value = []
+ for v in values_for_encoding:
+ if not isinstance(v, class_expected):
+ raise InvalidValueType((class_expected,))
+ values_append(v.encode())
+ if not self._bound_min <= len(values) <= self._bound_max:
+ raise BoundsError(self._bound_min, len(values), self._bound_max)
+ value = b"".join(values)
+ else:
+ value = b"".join(v.encode() for v in self._values_for_encoding())
+ return b"".join((self.tag, len_encode(len(value)), value))
+
+ def _encode1st(self, state):
+ state = super(SequenceOf, self)._encode1st(state)
+ if hasattr(self._value, NEXT_ATTR_NAME):
+ self._value = []
+ return state
+
+ def _encode2nd(self, writer, state_iter):
+ write_full(writer, self.tag + len_encode(next(state_iter)))
+ iterator = hasattr(self._value, NEXT_ATTR_NAME)
+ if iterator:
+ values_count = 0
+ class_expected = self.spec.__class__
+ values_for_encoding = self._values_for_encoding()
+ self._value = []
+ for v in values_for_encoding:
+ if not isinstance(v, class_expected):
+ raise InvalidValueType((class_expected,))
+ v.encode2nd(writer, state_iter)
+ values_count += 1
+ if not self._bound_min <= values_count <= self._bound_max:
+ raise BoundsError(self._bound_min, values_count, self._bound_max)
+ else:
+ for v in self._values_for_encoding():
+ v.encode2nd(writer, state_iter)
def _encode_cer(self, writer):
write_full(writer, self.tag + LENINDEF)
- for v in self._values_for_encoding():
- v.encode_cer(writer)
+ iterator = hasattr(self._value, NEXT_ATTR_NAME)
+ if iterator:
+ class_expected = self.spec.__class__
+ values_count = 0
+ values_for_encoding = self._values_for_encoding()
+ self._value = []
+ for v in values_for_encoding:
+ if not isinstance(v, class_expected):
+ raise InvalidValueType((class_expected,))
+ v.encode_cer(writer)
+ values_count += 1
+ if not self._bound_min <= values_count <= self._bound_max:
+ raise BoundsError(self._bound_min, values_count, self._bound_max)
+ else:
+ for v in self._values_for_encoding():
+ v.encode_cer(writer)
write_full(writer, EOC)
def _decode(
tag_default = tag_encode(form=TagFormConstructed, num=17)
asn1_type_name = "SET OF"
+ def _value_sanitize(self, value):
+ value = super(SetOf, self)._value_sanitize(value)
+ if hasattr(value, NEXT_ATTR_NAME):
+ raise ValueError(
+ "SetOf does not support iterator values, as no sense in them"
+ )
+ return value
+
def _encode(self):
v = b"".join(sorted(v.encode() for v in self._values_for_encoding()))
return b"".join((self.tag, len_encode(len(v)), v))
+ def _encode2nd(self, writer, state_iter):
+ write_full(writer, self.tag + len_encode(next(state_iter)))
+ values = []
+ for v in self._values_for_encoding():
+ buf = BytesIO()
+ v.encode2nd(buf.write, state_iter)
+ values.append(buf.getvalue())
+ values.sort()
+ for v in values:
+ write_full(writer, v)
+
def _encode_cer(self, writer):
write_full(writer, self.tag + LENINDEF)
for v in sorted(encode_cer(v) for v in self._values_for_encoding()):
with_colours=False,
with_decode_path=False,
decode_path_only=(),
+ decode_path=(),
):
def _pprint_pps(pps):
for pp in pps:
else:
for row in _pprint_pps(pp):
yield row
- return "\n".join(_pprint_pps(obj.pps()))
+ return "\n".join(_pprint_pps(obj.pps(decode_path)))
return SEQUENCEOF(), pprint_any
def main(): # pragma: no cover
import argparse
- parser = argparse.ArgumentParser(description="PyDERASN ASN.1 BER/DER decoder")
+ parser = argparse.ArgumentParser(description="PyDERASN ASN.1 BER/CER/DER decoder")
parser.add_argument(
"--skip",
type=int,
[obj_by_path(_path) for _path in (args.oids or "").split(",")]
if args.oids else ()
)
+ from functools import partial
if args.schema:
schema = obj_by_path(args.schema)
- from functools import partial
pprinter = partial(pprint, big_blobs=True)
else:
schema, pprinter = generic_decoder()
if __name__ == "__main__":
+ from pyderasn import *
main()