Skip to content

io

ascii

Helpers for loading numeric data from ASCII files.

extract_data_paths_from_dir(dir_path, file_pattern='*')

List data files in a directory and return their sorted paths.

Hidden files (names starting with '.' or '__') are excluded. The returned paths are sorted lexicographically by file name.

Parameters:

Name Type Description Default
dir_path str | Path

Path to the directory containing data files.

required
file_pattern str

Glob pattern to filter files (e.g. '*.dat', '*.xye').

'*'

Returns:

Type Description
list[str]

Sorted absolute paths to the matching data files.

Raises:

Type Description
FileNotFoundError

If dir_path does not exist or is not a directory.

ValueError

If no matching data files are found.

Source code in src/easydiffraction/io/ascii.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def extract_data_paths_from_dir(
    dir_path: str | Path,
    file_pattern: str = '*',
) -> list[str]:
    """
    List data files in a directory and return their sorted paths.

    Hidden files (names starting with ``'.'`` or ``'__'``) are excluded.
    The returned paths are sorted lexicographically by file name.

    Parameters
    ----------
    dir_path : str | Path
        Path to the directory containing data files.
    file_pattern : str, default='*'
        Glob pattern to filter files (e.g. ``'*.dat'``, ``'*.xye'``).

    Returns
    -------
    list[str]
        Sorted absolute paths to the matching data files.

    Raises
    ------
    FileNotFoundError
        If *dir_path* does not exist or is not a directory.
    ValueError
        If no matching data files are found.
    """
    dir_path = Path(dir_path)
    if not dir_path.is_dir():
        msg = f'Directory not found: {dir_path}'
        raise FileNotFoundError(msg)

    paths = sorted(
        str(p)
        for p in dir_path.glob(file_pattern)
        if p.is_file() and not p.name.startswith('.') and not p.name.startswith('__')
    )

    if not paths:
        msg = f"No files matching '{file_pattern}' found in directory: {dir_path}"
        raise ValueError(msg)

    return paths

extract_data_paths_from_zip(zip_path, destination=None)

Extract all files from a ZIP archive and return their paths.

Files are extracted into destination when provided, or into a temporary directory that persists for the lifetime of the process. The returned paths are sorted lexicographically by file name so that numbered data files (e.g. scan_001.dat, scan_002.dat) appear in natural order. Hidden files and directories (names starting with '.' or '__') are excluded.

Parameters:

Name Type Description Default
zip_path str | Path

Path to the ZIP archive.

required
destination str | Path | None

Directory to extract files into. When None, a temporary directory is created.

None

Returns:

Type Description
list[str]

Sorted absolute paths to the extracted data files.

Raises:

Type Description
FileNotFoundError

If zip_path does not exist.

ValueError

If the archive contains no usable data files.

Source code in src/easydiffraction/io/ascii.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def extract_data_paths_from_zip(
    zip_path: str | Path,
    destination: str | Path | None = None,
) -> list[str]:
    """
    Extract all files from a ZIP archive and return their paths.

    Files are extracted into *destination* when provided, or into a
    temporary directory that persists for the lifetime of the process.
    The returned paths are sorted lexicographically by file name so that
    numbered data files (e.g. ``scan_001.dat``, ``scan_002.dat``) appear
    in natural order. Hidden files and directories (names starting with
    ``'.'`` or ``'__'``) are excluded.

    Parameters
    ----------
    zip_path : str | Path
        Path to the ZIP archive.
    destination : str | Path | None, default=None
        Directory to extract files into.  When ``None``, a temporary
        directory is created.

    Returns
    -------
    list[str]
        Sorted absolute paths to the extracted data files.

    Raises
    ------
    FileNotFoundError
        If *zip_path* does not exist.
    ValueError
        If the archive contains no usable data files.
    """
    zip_path = Path(zip_path)
    if not zip_path.exists():
        msg = f'ZIP file not found: {zip_path}'
        raise FileNotFoundError(msg)

    if destination is not None:
        extract_dir = Path(destination)
        extract_dir.mkdir(parents=True, exist_ok=True)
    else:
        # TODO: Unify mkdir with other uses in the code
        extract_dir = Path(tempfile.mkdtemp(prefix='ed_zip_'))

    with zipfile.ZipFile(zip_path, 'r') as zf:
        zf.extractall(extract_dir)

    paths = sorted(
        str(p)
        for p in extract_dir.rglob('*')
        if p.is_file() and not p.name.startswith('.') and not p.name.startswith('__')
    )

    if not paths:
        msg = f'No data files found in ZIP archive: {zip_path}'
        raise ValueError(msg)

    return paths

extract_metadata(file_path, pattern)

Extract a single numeric value from a file using a regex pattern.

The entire file content is searched (not just the header). The first match is used. The regex must contain exactly one capture group whose match is convertible to float.

Parameters:

Name Type Description Default
file_path str | Path

Path to the input file.

required
pattern str

Regex with one capture group that matches the numeric value.

required

Returns:

Type Description
float | None

The extracted value, or None if the pattern did not match or the captured text could not be converted to float.

Source code in src/easydiffraction/io/ascii.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def extract_metadata(
    file_path: str | Path,
    pattern: str,
) -> float | None:
    """
    Extract a single numeric value from a file using a regex pattern.

    The entire file content is searched (not just the header).  The
    **first** match is used.  The regex must contain exactly one capture
    group whose match is convertible to ``float``.

    Parameters
    ----------
    file_path : str | Path
        Path to the input file.
    pattern : str
        Regex with one capture group that matches the numeric value.

    Returns
    -------
    float | None
        The extracted value, or ``None`` if the pattern did not match or
        the captured text could not be converted to float.
    """
    content = Path(file_path).read_text(encoding='utf-8', errors='ignore')
    match = re.search(pattern, content, re.MULTILINE)
    if match is None:
        return None
    try:
        return float(match.group(1))
    except (ValueError, IndexError):
        return None

extract_project_from_zip(zip_path, destination=None)

Extract a project directory from a ZIP archive.

The archive must contain exactly one directory with a project.cif file. Files are extracted into destination when provided, or into a temporary directory that persists for the lifetime of the process.

Parameters:

Name Type Description Default
zip_path str | Path

Path to the ZIP archive containing the project.

required
destination str | Path | None

Directory to extract into. When None, a temporary directory is created.

None

Returns:

Type Description
str

Absolute path to the extracted project directory (the directory that contains project.cif).

Raises:

Type Description
FileNotFoundError

If zip_path does not exist.

ValueError

If the archive does not contain a project.cif file.

Source code in src/easydiffraction/io/ascii.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def extract_project_from_zip(
    zip_path: str | Path,
    destination: str | Path | None = None,
) -> str:
    """
    Extract a project directory from a ZIP archive.

    The archive must contain exactly one directory with a
    ``project.cif`` file.  Files are extracted into *destination* when
    provided, or into a temporary directory that persists for the
    lifetime of the process.

    Parameters
    ----------
    zip_path : str | Path
        Path to the ZIP archive containing the project.
    destination : str | Path | None, default=None
        Directory to extract into.  When ``None``, a temporary directory
        is created.

    Returns
    -------
    str
        Absolute path to the extracted project directory (the directory
        that contains ``project.cif``).

    Raises
    ------
    FileNotFoundError
        If *zip_path* does not exist.
    ValueError
        If the archive does not contain a ``project.cif`` file.
    """
    zip_path = Path(zip_path)
    if not zip_path.exists():
        msg = f'ZIP file not found: {zip_path}'
        raise FileNotFoundError(msg)

    if destination is not None:
        extract_dir = Path(destination)
        extract_dir.mkdir(parents=True, exist_ok=True)
    else:
        extract_dir = Path(tempfile.mkdtemp(prefix='ed_zip_'))

    with zipfile.ZipFile(zip_path, 'r') as zf:
        # Determine the project directory from the archive contents
        # *before* extraction, so we are not confused by unrelated
        # project.cif files already present in the destination.
        project_cif_entries = [name for name in zf.namelist() if name.endswith('project.cif')]
        if not project_cif_entries:
            msg = f'No project.cif found in ZIP archive: {zip_path}'
            raise ValueError(msg)

        zf.extractall(extract_dir)

    project_cif_path = extract_dir / project_cif_entries[0]
    return str(project_cif_path.parent.resolve())

load_numeric_block(data_path)

Load a numeric block from an ASCII file, skipping non-numeric lines.

Each line is tested individually: lines whose whitespace-separated tokens are all valid floats are kept; everything else (headers, footers, comment lines) is silently discarded.

Parameters:

Name Type Description Default
data_path str | Path

Path to the ASCII data file.

required

Returns:

Type Description
ndarray

2-D array of the parsed numeric data.

Raises:

Type Description
OSError

If no numeric lines can be found in the file.

Source code in src/easydiffraction/io/ascii.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def load_numeric_block(data_path: str | Path) -> np.ndarray:
    """
    Load a numeric block from an ASCII file, skipping non-numeric lines.

    Each line is tested individually: lines whose whitespace-separated
    tokens are all valid floats are kept; everything else (headers,
    footers, comment lines) is silently discarded.

    Parameters
    ----------
    data_path : str | Path
        Path to the ASCII data file.

    Returns
    -------
    np.ndarray
        2-D array of the parsed numeric data.

    Raises
    ------
    OSError
        If no numeric lines can be found in the file.
    """
    data_path = Path(data_path)
    lines = data_path.read_text().splitlines()

    numeric_lines: list[str] = []
    for line in lines:
        tokens = line.split()
        if not tokens:
            continue
        try:
            for token in tokens:
                float(token)
        except ValueError:
            continue
        numeric_lines.append(line)

    if not numeric_lines:
        msg = f'Failed to read numeric data from {data_path}: no numeric lines found'
        raise OSError(msg)

    return np.loadtxt(StringIO('\n'.join(numeric_lines)))

cif

handler

Minimal CIF tag handler used by descriptors/parameters.

CifHandler

Canonical CIF handler used by descriptors/parameters.

Holds CIF tags (names) and attaches to an owning descriptor so it can derive a stable uid if needed.

Source code in src/easydiffraction/io/cif/handler.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class CifHandler:
    """
    Canonical CIF handler used by descriptors/parameters.

    Holds CIF tags (names) and attaches to an owning descriptor so it
    can derive a stable uid if needed.
    """

    def __init__(self, *, names: list[str]) -> None:
        self._names = names
        self._owner = None  # set by attach

    def attach(self, owner: object) -> None:
        """Attach to a descriptor or parameter instance."""
        self._owner = owner

    @property
    def names(self) -> list[str]:
        """List of CIF tag names associated with the owner."""
        return self._names

    @property
    def uid(self) -> str | None:
        """Unique identifier taken from the owner, if attached."""
        if self._owner is None:
            return None
        return self._owner.unique_name
attach(owner)

Attach to a descriptor or parameter instance.

Source code in src/easydiffraction/io/cif/handler.py
20
21
22
def attach(self, owner: object) -> None:
    """Attach to a descriptor or parameter instance."""
    self._owner = owner
names property

List of CIF tag names associated with the owner.

uid property

Unique identifier taken from the owner, if attached.

parse

document_from_path(path)

Read a CIF document from a file path.

Source code in src/easydiffraction/io/cif/parse.py
12
13
14
def document_from_path(path: str) -> gemmi.cif.Document:
    """Read a CIF document from a file path."""
    return gemmi.cif.read_file(path)

document_from_string(text)

Read a CIF document from a raw text string.

Source code in src/easydiffraction/io/cif/parse.py
17
18
19
def document_from_string(text: str) -> gemmi.cif.Document:
    """Read a CIF document from a raw text string."""
    return gemmi.cif.read_string(text)

name_from_block(block)

Extract a model name from the CIF block name.

Source code in src/easydiffraction/io/cif/parse.py
27
28
29
30
def name_from_block(block: gemmi.cif.Block) -> str:
    """Extract a model name from the CIF block name."""
    # TODO: Need validator or normalization?
    return block.name

pick_sole_block(doc)

Pick the sole data block from a CIF document.

Source code in src/easydiffraction/io/cif/parse.py
22
23
24
def pick_sole_block(doc: gemmi.cif.Document) -> gemmi.cif.Block:
    """Pick the sole data block from a CIF document."""
    return doc.sole_block()

read_cif_str(block, tag)

Read a single string value from a CIF block by tag.

Strips surrounding single or double quotes when present, and returns None for absent tags or CIF unknown/inapplicable markers (? / .).

Parameters:

Name Type Description Default
block Block

Parsed CIF data block to read from.

required
tag str

CIF tag to look up (e.g. '_peak.profile_type').

required

Returns:

Type Description
str | None

Unquoted string value, or None if not found.

Source code in src/easydiffraction/io/cif/parse.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def read_cif_str(block: gemmi.cif.Block, tag: str) -> str | None:
    """
    Read a single string value from a CIF block by tag.

    Strips surrounding single or double quotes when present, and returns
    ``None`` for absent tags or CIF unknown/inapplicable markers (``?``
    / ``.``).

    Parameters
    ----------
    block : gemmi.cif.Block
        Parsed CIF data block to read from.
    tag : str
        CIF tag to look up (e.g. ``'_peak.profile_type'``).

    Returns
    -------
    str | None
        Unquoted string value, or ``None`` if not found.
    """
    vals = list(block.find_values(tag))
    if not vals:
        return None
    raw: str = vals[0]
    if raw in {'?', '.'}:
        return None
    if len(raw) >= _MIN_QUOTED_LEN and raw[0] == raw[-1] and raw[0] in {"'", '"'}:
        return raw[1:-1]
    return raw

serialize

analysis_from_cif(analysis, cif_text)

Populate an Analysis instance from CIF text.

Reads the fitting engine, fit mode, aliases, constraints, and joint-fit experiment weights from the given CIF string.

Parameters:

Name Type Description Default
analysis object

The Analysis instance to populate.

required
cif_text str

CIF text content of analysis.cif.

required
Source code in src/easydiffraction/io/cif/serialize.py
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
def analysis_from_cif(analysis: object, cif_text: str) -> None:
    """
    Populate an Analysis instance from CIF text.

    Reads the fitting engine, fit mode, aliases, constraints, and
    joint-fit experiment weights from the given CIF string.

    Parameters
    ----------
    analysis : object
        The ``Analysis`` instance to populate.
    cif_text : str
        CIF text content of ``analysis.cif``.
    """
    import gemmi  # noqa: PLC0415

    doc = gemmi.cif.read_string(_wrap_in_data_block(cif_text, 'analysis'))
    block = doc.sole_block()

    read_cif_string = _make_cif_string_reader(block)

    # Restore minimizer selection
    engine = read_cif_string('_analysis.fitting_engine')
    if engine is not None:
        from easydiffraction.analysis.fitting import Fitter  # noqa: PLC0415

        analysis.fitter = Fitter(engine)

    # Restore fit mode
    analysis.fit_mode.from_cif(block)

    # Restore aliases (loop)
    analysis.aliases.from_cif(block)

    # Restore constraints (loop)
    analysis.constraints.from_cif(block)
    if analysis.constraints._items:
        analysis.constraints.enable()

    # Restore joint-fit experiment weights (loop)
    analysis._joint_fit_experiments.from_cif(block)

analysis_to_cif(analysis)

Render analysis metadata, aliases, and constraints to CIF.

Source code in src/easydiffraction/io/cif/serialize.py
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
def analysis_to_cif(analysis: object) -> str:
    """Render analysis metadata, aliases, and constraints to CIF."""
    cur_min = format_value(analysis.current_minimizer)
    lines: list[str] = []
    lines.extend((
        f'_analysis.fitting_engine  {cur_min}',
        analysis.fit_mode.as_cif,
        '',
        analysis.aliases.as_cif,
        '',
        analysis.constraints.as_cif,
    ))
    jfe_cif = analysis.joint_fit_experiments.as_cif
    if jfe_cif:
        lines.extend(('', jfe_cif))
    return '\n'.join(lines)

category_collection_from_cif(self, block)

Populate a CategoryCollection from a CIF loop.

Parameters:

Name Type Description Default
self CategoryCollection

The collection instance to populate.

required
block Block

Parsed CIF block to read the loop from.

required

Raises:

Type Description
ValueError

If the collection has no _item_type defined.

Source code in src/easydiffraction/io/cif/serialize.py
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
def category_collection_from_cif(
    self: CategoryCollection,
    block: gemmi.cif.Block,
) -> None:
    """
    Populate a CategoryCollection from a CIF loop.

    Parameters
    ----------
    self : CategoryCollection
        The collection instance to populate.
    block : gemmi.cif.Block
        Parsed CIF block to read the loop from.

    Raises
    ------
    ValueError
        If the collection has no ``_item_type`` defined.
    """
    # TODO: Find a better way and then remove TODO in the AtomSite
    #  class
    # TODO: Rename to _item_cls?
    if self._item_type is None:
        msg = 'Child class is not defined.'
        raise ValueError(msg)

    # Create a temporary instance to access its parameters and
    # parameter CIF names
    category_item = self._item_type()

    # Iterate over category parameters and their possible CIF names
    # trying to find the whole loop it belongs to inside the CIF block
    loop = _find_loop_for_category(block, category_item)

    # If no loop found
    if loop is None:
        log.debug(f'No loop found for category {self}.')
        return

    # Get 2D array of loop values (as strings)
    num_rows = loop.length()
    num_cols = loop.width()
    array = np.array(loop.values, dtype=str).reshape(num_rows, num_cols)

    # Pre-create default items in the collection
    self._items = [self._item_type() for _ in range(num_rows)]

    # Set parent for each item to enable identity resolution
    for item in self._items:
        object.__setattr__(item, '_parent', self)  # noqa: PLC2801

    # Set those items' parameters, which are present in the loop
    for row_idx in range(num_rows):
        current_item = self._items[row_idx]
        for param in current_item.parameters:
            for cif_name in param._cif_handler.names:
                if cif_name in loop.tags:
                    col_idx = loop.tags.index(cif_name)
                    # TODO: The following is duplication of
                    #  param_from_cif
                    _set_param_from_raw_cif_value(param, array[row_idx][col_idx])
                    break

category_collection_to_cif(collection, max_display=None)

Render a CategoryCollection-like object to CIF text.

Uses first item to build loop header, then emits rows for each item.

Parameters:

Name Type Description Default
collection object

A CategoryCollection-like object.

required
max_display int | None

When set to a positive integer, truncate the output to at most this many rows (half from the start, half from the end) with an ... separator. None emits all rows.

None

Returns:

Type Description
str

CIF text representing the collection as a loop.

Source code in src/easydiffraction/io/cif/serialize.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def category_collection_to_cif(
    collection: object,
    max_display: int | None = None,
) -> str:
    """
    Render a CategoryCollection-like object to CIF text.

    Uses first item to build loop header, then emits rows for each item.

    Parameters
    ----------
    collection : object
        A ``CategoryCollection``-like object.
    max_display : int | None, default=None
        When set to a positive integer, truncate the output to at most
        this many rows (half from the start, half from the end) with an
        ``...`` separator.  ``None`` emits all rows.

    Returns
    -------
    str
        CIF text representing the collection as a loop.
    """
    if not len(collection):
        return ''

    # Allow collections to conditionally suppress CIF output
    skip = getattr(collection, '_skip_cif_serialization', None)
    if skip is not None and skip():
        return ''

    lines: list[str] = []

    # Header — use first item's CIF tag names as the canonical columns
    first_item = next(iter(collection.values()))
    lines.append('loop_')
    header_tags: list[str] = []
    for p in first_item.parameters:
        tags = p._cif_handler.names  # type: ignore[attr-defined]
        header_tags.append(tags[0])
        lines.append(tags[0])

    # Allow collections to customise per-item row formatting
    row_hook = getattr(collection, '_format_cif_row', None)

    def _row(item: object) -> list[str]:
        if row_hook is not None:
            override = row_hook(item)
            if override is not None:
                return override
        return [format_param_value(p) for p in item.parameters]

    items = list(collection.values())
    lines.extend(_emit_loop_rows(items, _row, header_tags, max_display))

    return '\n'.join(lines)

category_item_from_cif(self, block, idx=0)

Populate each parameter from CIF block at given loop index.

Source code in src/easydiffraction/io/cif/serialize.py
563
564
565
566
567
568
569
570
def category_item_from_cif(
    self: CategoryItem,
    block: gemmi.cif.Block,
    idx: int = 0,
) -> None:
    """Populate each parameter from CIF block at given loop index."""
    for param in self.parameters:
        param.from_cif(block, idx=idx)

category_item_to_cif(item)

Render a CategoryItem-like object to CIF text.

Expects item.parameters iterable of params with _cif_handler.names and value.

Source code in src/easydiffraction/io/cif/serialize.py
138
139
140
141
142
143
144
145
146
def category_item_to_cif(item: object) -> str:
    """
    Render a CategoryItem-like object to CIF text.

    Expects ``item.parameters`` iterable of params with
    ``_cif_handler.names`` and ``value``.
    """
    lines: list[str] = [param_to_cif(p) for p in item.parameters]
    return '\n'.join(lines)

datablock_collection_to_cif(collection)

Render a collection of datablocks by joining their CIF blocks.

Source code in src/easydiffraction/io/cif/serialize.py
289
290
291
def datablock_collection_to_cif(collection: object) -> str:
    """Render a collection of datablocks by joining their CIF blocks."""
    return '\n\n'.join([block.as_cif for block in collection.values()])

datablock_item_to_cif(datablock, max_loop_display=None)

Render a DatablockItem-like object to CIF text.

Emits a data_ header and then concatenates category CIF sections.

Parameters:

Name Type Description Default
datablock object

A DatablockItem-like object.

required
max_loop_display int | None

When set, truncate loop categories to this many rows. None emits all rows (used for serialisation).

None

Returns:

Type Description
str

CIF text representing the datablock as a loop.

Source code in src/easydiffraction/io/cif/serialize.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
def datablock_item_to_cif(
    datablock: object,
    max_loop_display: int | None = None,
) -> str:
    """
    Render a DatablockItem-like object to CIF text.

    Emits a data_ header and then concatenates category CIF sections.

    Parameters
    ----------
    datablock : object
        A ``DatablockItem``-like object.
    max_loop_display : int | None, default=None
        When set, truncate loop categories to this many rows. ``None``
        emits all rows (used for serialisation).

    Returns
    -------
    str
        CIF text representing the datablock as a loop.
    """
    # Local imports to avoid import-time cycles
    from easydiffraction.core.category import CategoryCollection  # noqa: PLC0415
    from easydiffraction.core.category import CategoryItem  # noqa: PLC0415

    header = f'data_{datablock._identity.datablock_entry_name}'
    parts: list[str] = [header]

    # First categories
    parts.extend(v.as_cif for v in vars(datablock).values() if isinstance(v, CategoryItem))

    # Then collections
    parts.extend(
        category_collection_to_cif(v, max_display=max_loop_display)
        for v in vars(datablock).values()
        if isinstance(v, CategoryCollection)
    )

    return '\n\n'.join(parts)

experiment_to_cif(experiment)

Render an experiment: datablock part plus measured data.

Source code in src/easydiffraction/io/cif/serialize.py
339
340
341
def experiment_to_cif(experiment: object) -> str:
    """Render an experiment: datablock part plus measured data."""
    return datablock_item_to_cif(experiment)

format_param_value(param)

Format a parameter value for CIF output, encoding the free flag.

CIF convention for numeric parameters:

  • Fixed or constrained parameter: plain value, e.g. 3.89090000
  • Free parameter without uncertainty: value with empty brackets, e.g. 3.89090000()
  • Free parameter with uncertainty: value with esd in brackets, e.g. 3.89090000(200000)

Constrained (dependent) parameters are always written without brackets, even if their free flag is True, because they are not independently varied by the minimizer.

Non-numeric parameters and descriptors without a free attribute are formatted with :func:format_value.

Parameters:

Name Type Description Default
param object

A descriptor or parameter exposing .value and optionally .free, .constrained, and .uncertainty.

required

Returns:

Type Description
str

Formatted CIF value string.

Source code in src/easydiffraction/io/cif/serialize.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def format_param_value(param: object) -> str:
    """
    Format a parameter value for CIF output, encoding the free flag.

    CIF convention for numeric parameters:

    - Fixed or constrained parameter: plain value, e.g. ``3.89090000``
    - Free parameter without uncertainty: value with empty brackets,
      e.g. ``3.89090000()``
    - Free parameter with uncertainty: value with esd in brackets,
      e.g. ``3.89090000(200000)``

    Constrained (dependent) parameters are always written without
    brackets, even if their ``free`` flag is ``True``, because they are
    not independently varied by the minimizer.

    Non-numeric parameters and descriptors without a ``free`` attribute
    are formatted with :func:`format_value`.

    Parameters
    ----------
    param : object
        A descriptor or parameter exposing ``.value`` and optionally
        ``.free``, ``.constrained``, and ``.uncertainty``.

    Returns
    -------
    str
        Formatted CIF value string.
    """
    from easydiffraction.core.variable import Parameter  # noqa: PLC0415

    is_free = param.free if isinstance(param, Parameter) else False
    is_constrained = param.constrained if isinstance(param, Parameter) else False
    value = param.value  # type: ignore[attr-defined]

    if not is_free or is_constrained or not isinstance(value, (int, float)):
        return format_value(value)

    precision = 8
    uncertainty = getattr(param, 'uncertainty', None)
    formatted_value = f'{float(value):.{precision}f}'

    if uncertainty is not None and uncertainty > 0:
        from uncertainties import ufloat as _ufloat  # noqa: PLC0415

        u = _ufloat(float(value), float(uncertainty))
        return f'{u:.{precision}fS}'

    return f'{formatted_value}()'

format_value(value)

Format a single CIF value for output.

.. note:: The precision must be high enough so that the minimizer's finite-difference Jacobian probes (typically ~1e-8 relative) survive the float→string→float round-trip through CIF.

Source code in src/easydiffraction/io/cif/serialize.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def format_value(value: object) -> str:
    """
    Format a single CIF value for output.

    .. note::     The precision must be high enough so that the
    minimizer's     finite-difference Jacobian probes (typically ~1e-8
    relative)     survive the float→string→float round-trip through CIF.
    """
    precision = 8

    # Converting

    # None → CIF unknown marker
    if value is None:
        value = '?'
    # Convert ints to floats
    elif isinstance(value, int):
        value = float(value)
    # Empty strings → CIF unknown marker
    elif isinstance(value, str) and not value.strip():
        value = '?'
    # Strings with whitespace are quoted
    elif isinstance(value, str) and (' ' in value or '\t' in value):
        value = f'"{value}"'

    # Formatting

    # Format floats with given precision
    if isinstance(value, float):
        return f'{value:.{precision}f}'
    # Format strings as-is
    if isinstance(value, str):
        return value
    # Everything else: fallback
    return str(value)

param_from_cif(self, block, idx=0)

Populate a single descriptor from a CIF block.

Parameters:

Name Type Description Default
self GenericDescriptorBase

The descriptor instance to populate.

required
block Block

Parsed CIF block to read values from.

required
idx int

Row index used when the tag belongs to a loop.

0
Source code in src/easydiffraction/io/cif/serialize.py
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
def param_from_cif(
    self: GenericDescriptorBase,
    block: gemmi.cif.Block,
    idx: int = 0,
) -> None:
    """
    Populate a single descriptor from a CIF block.

    Parameters
    ----------
    self : GenericDescriptorBase
        The descriptor instance to populate.
    block : gemmi.cif.Block
        Parsed CIF block to read values from.
    idx : int, default=0
        Row index used when the tag belongs to a loop.
    """
    found_values: list[Any] = []

    # Try to find the value(s) from the CIF block iterating over
    # the possible cif names in order of preference.
    for tag in self._cif_handler.names:
        candidates = list(block.find_values(tag))
        if candidates:
            found_values = candidates
            break

    # If no values found, the parameter keeps its default value.
    if not found_values:
        return

    # If found, pick the one at the given index
    raw = found_values[idx]

    # CIF unknown / inapplicable markers → keep default
    if raw in {'?', '.'}:
        return

    # If numeric, parse with uncertainty if present
    if self._value_type == DataTypes.NUMERIC:
        has_brackets = '(' in raw
        u = str_to_ufloat(raw)
        self.value = u.n
        if has_brackets and hasattr(self, 'free'):
            self.free = True  # type: ignore[attr-defined]
            if not np.isnan(u.s) and hasattr(self, 'uncertainty'):
                self.uncertainty = u.s  # type: ignore[attr-defined]

    # If string, strip quotes if present
    elif self._value_type == DataTypes.STRING:
        if len(raw) >= _MIN_QUOTED_LEN and raw[0] == raw[-1] and raw[0] in {"'", '"'}:
            self.value = raw[1:-1]
        else:
            self.value = raw

    # Other types are not supported
    else:
        log.debug(f'Unrecognized type: {self._value_type}')

param_to_cif(param)

Render a single descriptor/parameter to a CIF line.

Expects param to expose _cif_handler.names and value. Free parameters are written with uncertainty brackets (see :func:format_param_value).

Source code in src/easydiffraction/io/cif/serialize.py
125
126
127
128
129
130
131
132
133
134
135
def param_to_cif(param: object) -> str:
    """
    Render a single descriptor/parameter to a CIF line.

    Expects ``param`` to expose ``_cif_handler.names`` and ``value``.
    Free parameters are written with uncertainty brackets (see
    :func:`format_param_value`).
    """
    tags: Sequence[str] = param._cif_handler.names  # type: ignore[attr-defined]
    main_key: str = tags[0]
    return f'{main_key} {format_param_value(param)}'

project_info_from_cif(info, cif_text)

Populate a ProjectInfo instance from CIF text.

Reads _project.id, _project.title, and _project.description from the given CIF string and sets them on the info object.

Parameters:

Name Type Description Default
info object

The ProjectInfo instance to populate.

required
cif_text str

CIF text content of project.cif.

required
Source code in src/easydiffraction/io/cif/serialize.py
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
def project_info_from_cif(info: object, cif_text: str) -> None:
    """
    Populate a ProjectInfo instance from CIF text.

    Reads ``_project.id``, ``_project.title``, and
    ``_project.description`` from the given CIF string and sets them on
    the *info* object.

    Parameters
    ----------
    info : object
        The ``ProjectInfo`` instance to populate.
    cif_text : str
        CIF text content of ``project.cif``.
    """
    import gemmi  # noqa: PLC0415

    doc = gemmi.cif.read_string(_wrap_in_data_block(cif_text, 'project'))
    block = doc.sole_block()

    read_cif_string = _make_cif_string_reader(block)

    name = read_cif_string('_project.id')
    if name is not None:
        info.name = name

    title = read_cif_string('_project.title')
    if title is not None:
        info.title = title

    description = read_cif_string('_project.description')
    if description is not None:
        info.description = description

project_info_to_cif(info)

Render ProjectInfo to CIF text (id, title, description).

Source code in src/easydiffraction/io/cif/serialize.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
def project_info_to_cif(info: object) -> str:
    """Render ProjectInfo to CIF text (id, title, description)."""
    name = f'{info.name}'

    title = f'{info.title}'
    if ' ' in title:
        title = f"'{title}'"

    if len(info.description) > _CIF_DESCRIPTION_WRAP_LEN:
        description = f'\n;\n{info.description}\n;'
    elif info.description:
        description = f'{info.description}'
        if ' ' in description:
            description = f"'{description}'"
    else:
        description = '?'

    created = f"'{info._created.strftime('%d %b %Y %H:%M:%S')}'"
    last_modified = f"'{info._last_modified.strftime('%d %b %Y %H:%M:%S')}'"

    return (
        f'_project.id               {name}\n'
        f'_project.title            {title}\n'
        f'_project.description      {description}\n'
        f'_project.created          {created}\n'
        f'_project.last_modified    {last_modified}'
    )

project_to_cif(project)

Render a whole project by concatenating sections when present.

Source code in src/easydiffraction/io/cif/serialize.py
323
324
325
326
327
328
329
330
331
332
333
334
335
336
def project_to_cif(project: object) -> str:
    """Render a whole project by concatenating sections when present."""
    parts: list[str] = []
    if hasattr(project, 'info'):
        parts.append(project.info.as_cif)
    if getattr(project, 'structures', None):
        parts.append(project.structures.as_cif)
    if getattr(project, 'experiments', None):
        parts.append(project.experiments.as_cif)
    if getattr(project, 'analysis', None):
        parts.append(project.analysis.as_cif())
    if getattr(project, 'summary', None):
        parts.append(project.summary.as_cif())
    return '\n\n'.join([p for p in parts if p])

summary_to_cif(_summary)

Render a summary CIF block (placeholder for now).

Source code in src/easydiffraction/io/cif/serialize.py
362
363
364
def summary_to_cif(_summary: object) -> str:
    """Render a summary CIF block (placeholder for now)."""
    return 'To be added...'