Documentation for Column.py

Models chromatographic column in an LC-MS process. This is particularly useful to investigate the effect of retention time drifts on chemicals.

CleanColumn

Bases: Column

A clean column with no RT noise

Source code in vimms/Column.py
104
105
106
107
108
109
110
111
112
113
114
115
class CleanColumn(Column):
    """
    A clean column with no RT noise
    """

    def __init__(self, dataset):
        """
        Create a clean column object
        Args:
            dataset: the set of Chemicals that passes through this column
        """
        super().__init__(dataset, 0.0)

__init__(dataset)

Create a clean column object Args: dataset: the set of Chemicals that passes through this column

Source code in vimms/Column.py
109
110
111
112
113
114
115
def __init__(self, dataset):
    """
    Create a clean column object
    Args:
        dataset: the set of Chemicals that passes through this column
    """
    super().__init__(dataset, 0.0)

Column

Defines a base Column class that operates on a dataset and having some noise parameter

Source code in vimms/Column.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
class Column:
    """
    Defines a base Column class that operates on a dataset and having some noise parameter
    """

    def __init__(self, dataset, noise_sd):
        """
        Create a column object

        Args:
            dataset: the set of Chemicals that passes through this column
            noise_sd: the noise standard deviation sampled from a normal distribution
        """
        self.dataset = dataset
        self.dataset_rts = np.array([chem.rt for chem in self.dataset])
        self.dataset_apex_rts = np.array([chem.get_apex_rt() for chem in self.dataset])
        self.noise_sd = noise_sd
        self.offsets, self.true_drift_function = self._get_offsets()

    def _get_offsets(self):
        """
        Computes the offset and the offset function

        Returns: the offset and the offset function

        """
        true_offset_function = np.array([0.0 for chem in self.dataset])
        offsets = true_offset_function + np.random.normal(0, self.noise_sd, len(self.dataset))
        return offsets, true_offset_function

    def get_dataset(self):
        """
        Gets a modified dataset with column (RT) noise applied
        Returns: a new list of Chemicals where its noise have been modified by the column

        """
        new_dataset = []
        for i, chem in enumerate(self.dataset):
            new_chem = copy.deepcopy(chem)
            new_chem.rt += self.offsets[i]
            new_dataset.append(new_chem)
        return new_dataset

    def get_chemical(self, idx):
        """
        Gets the chemical at the specified index (?)
        Args:
            idx: the index to search

        Returns: the chemical and its offset (?)

        """
        return self.dataset[idx] + self.offsets[idx]

    def plot_drift(self):
        """
        Plot the drift

        Returns: None

        """
        order = np.argsort(self.dataset_rts)
        plt.figure(figsize=(12, 8))
        plt.plot(self.dataset_rts[order], self.true_drift_function[order], "b")
        plt.plot(
            self.dataset_rts[order], self.true_drift_function[order] + 1.95 * self.noise_sd, "b--"
        )
        plt.plot(
            self.dataset_rts[order], self.true_drift_function[order] - 1.95 * self.noise_sd, "b--"
        )
        plt.plot(self.dataset_rts, self.offsets, "ro")
        plt.ylabel("Drift Amount")
        plt.xlabel("Base RT")
        plt.show()

    def plot_drift_distribution(self):
        """
        Plot drift distribution

        Returns: None

        """
        order = np.argsort(self.dataset_rts)
        plt.figure(figsize=(12, 8))
        for i in range(100):
            offsets, true_drift_function = self._get_offsets()
            plt.plot(self.dataset_rts[order], true_drift_function[order])
        plt.ylabel("Drift Amount")
        plt.xlabel("Base RT")
        plt.show()

__init__(dataset, noise_sd)

Create a column object

Parameters:
  • dataset

    the set of Chemicals that passes through this column

  • noise_sd

    the noise standard deviation sampled from a normal distribution

Source code in vimms/Column.py
17
18
19
20
21
22
23
24
25
26
27
28
29
def __init__(self, dataset, noise_sd):
    """
    Create a column object

    Args:
        dataset: the set of Chemicals that passes through this column
        noise_sd: the noise standard deviation sampled from a normal distribution
    """
    self.dataset = dataset
    self.dataset_rts = np.array([chem.rt for chem in self.dataset])
    self.dataset_apex_rts = np.array([chem.get_apex_rt() for chem in self.dataset])
    self.noise_sd = noise_sd
    self.offsets, self.true_drift_function = self._get_offsets()

get_chemical(idx)

Gets the chemical at the specified index (?) Args: idx: the index to search

Returns: the chemical and its offset (?)

Source code in vimms/Column.py
55
56
57
58
59
60
61
62
63
64
def get_chemical(self, idx):
    """
    Gets the chemical at the specified index (?)
    Args:
        idx: the index to search

    Returns: the chemical and its offset (?)

    """
    return self.dataset[idx] + self.offsets[idx]

get_dataset()

Gets a modified dataset with column (RT) noise applied Returns: a new list of Chemicals where its noise have been modified by the column

Source code in vimms/Column.py
42
43
44
45
46
47
48
49
50
51
52
53
def get_dataset(self):
    """
    Gets a modified dataset with column (RT) noise applied
    Returns: a new list of Chemicals where its noise have been modified by the column

    """
    new_dataset = []
    for i, chem in enumerate(self.dataset):
        new_chem = copy.deepcopy(chem)
        new_chem.rt += self.offsets[i]
        new_dataset.append(new_chem)
    return new_dataset

plot_drift()

Plot the drift

Returns: None

Source code in vimms/Column.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def plot_drift(self):
    """
    Plot the drift

    Returns: None

    """
    order = np.argsort(self.dataset_rts)
    plt.figure(figsize=(12, 8))
    plt.plot(self.dataset_rts[order], self.true_drift_function[order], "b")
    plt.plot(
        self.dataset_rts[order], self.true_drift_function[order] + 1.95 * self.noise_sd, "b--"
    )
    plt.plot(
        self.dataset_rts[order], self.true_drift_function[order] - 1.95 * self.noise_sd, "b--"
    )
    plt.plot(self.dataset_rts, self.offsets, "ro")
    plt.ylabel("Drift Amount")
    plt.xlabel("Base RT")
    plt.show()

plot_drift_distribution()

Plot drift distribution

Returns: None

Source code in vimms/Column.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def plot_drift_distribution(self):
    """
    Plot drift distribution

    Returns: None

    """
    order = np.argsort(self.dataset_rts)
    plt.figure(figsize=(12, 8))
    for i in range(100):
        offsets, true_drift_function = self._get_offsets()
        plt.plot(self.dataset_rts[order], true_drift_function[order])
    plt.ylabel("Drift Amount")
    plt.xlabel("Base RT")
    plt.show()

GaussianProcessColumn

Bases: Column

A gaussian-process based column

Source code in vimms/Column.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
class GaussianProcessColumn(Column):
    """
    A gaussian-process based column
    """

    def __init__(self, dataset, noise_sd, rbf_params, intercept_params, linear_params):
        """
        Create a gaussian process drift column
        Args:
            dataset:
            noise_sd:
            rbf_params:
            intercept_params:
            linear_params:
        """
        self.rbf_params = rbf_params
        self.intercept_params = intercept_params
        self.linear_params = linear_params
        super().__init__(dataset, noise_sd)

    def _get_offsets(self):
        """
        Get offset

        Returns: ???

        """
        intercept_term = np.random.normal(self.intercept_params[0], self.intercept_params[1])
        linear_term = np.random.normal(self.linear_params[0], self.linear_params[1])
        mean = intercept_term + linear_term * self.dataset_apex_rts
        return self._draw_offset(mean)

    def _draw_offset(self, mean):
        """
        Draw offset

        Args:
            mean:

        Returns: ???

        """
        N = len(self.dataset_apex_rts)
        K = np.zeros((N, N), np.double)
        for n in range(N):
            for m in range(N):
                K[n, m] = self.rbf_params[0] * np.exp(
                    -(1.0 / self.rbf_params[1])
                    * (self.dataset_apex_rts[n] - self.dataset_apex_rts[m]) ** 2
                )
        true_offset_function = np.random.multivariate_normal(mean, K)
        offsets = true_offset_function + np.random.normal(0, self.noise_sd, N)
        return offsets, true_offset_function

__init__(dataset, noise_sd, rbf_params, intercept_params, linear_params)

Create a gaussian process drift column Args: dataset: noise_sd: rbf_params: intercept_params: linear_params:

Source code in vimms/Column.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def __init__(self, dataset, noise_sd, rbf_params, intercept_params, linear_params):
    """
    Create a gaussian process drift column
    Args:
        dataset:
        noise_sd:
        rbf_params:
        intercept_params:
        linear_params:
    """
    self.rbf_params = rbf_params
    self.intercept_params = intercept_params
    self.linear_params = linear_params
    super().__init__(dataset, noise_sd)

LinearColumn

Bases: Column

A column with linear drift in the RT

Source code in vimms/Column.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
class LinearColumn(Column):
    """
    A column with linear drift in the RT
    """

    def __init__(self, dataset, noise_sd, intercept_params, linear_params):
        """
        Create a linear drift column
        Args:
            dataset: the set of Chemicals that passes through this column
            noise_sd: noise standard deviation
            intercept_params: intercept parameters
            linear_params: linear parameters
        """
        self.intercept_params = intercept_params
        self.linear_params = linear_params
        self.intercept_term = np.random.normal(self.intercept_params[0], self.intercept_params[1])
        self.linear_term = np.random.normal(self.linear_params[0], self.linear_params[1])
        super().__init__(dataset, noise_sd)

    @staticmethod
    def from_fixed_offsets(dataset, noise_sd, intercept_term, linear_term):
        """
        From fixed offsets

        Args:
            dataset:
            noise_sd:
            intercept_term:
            linear_term:

        Returns: ???

        """
        new = LinearColumn(dataset, noise_sd, (0, 0), (0, 0))
        new.intercept_term, new.linear_term = intercept_term, linear_term
        new.offsets, new.true_drift_function = new._get_offsets()
        return new

    def _get_offsets(self):
        """
        Get offsets

        Returns: ???

        """
        true_offset_function = self.intercept_term + self.linear_term * self.dataset_apex_rts
        offsets = true_offset_function + np.random.normal(0, self.noise_sd, len(self.dataset))
        return offsets, true_offset_function

    def drift_fn(self, roi, injection_number):
        """
        Drift function

        Args:
            roi:
            injection_number:

        Returns: ???

        """
        """f(rt) = rt + (m * rt + c)
        rt + m * rt = f(rt) - c
        rt(1 + m) = f(rt) - c
        rt = (f(rt) - c) / (1 + m)"""
        rt = roi.estimate_apex()
        return (
            rt - (rt - self.intercept_term) / (1 + self.linear_term),
            {},
        )  # this doesn't account for noise?

__init__(dataset, noise_sd, intercept_params, linear_params)

Create a linear drift column Args: dataset: the set of Chemicals that passes through this column noise_sd: noise standard deviation intercept_params: intercept parameters linear_params: linear parameters

Source code in vimms/Column.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def __init__(self, dataset, noise_sd, intercept_params, linear_params):
    """
    Create a linear drift column
    Args:
        dataset: the set of Chemicals that passes through this column
        noise_sd: noise standard deviation
        intercept_params: intercept parameters
        linear_params: linear parameters
    """
    self.intercept_params = intercept_params
    self.linear_params = linear_params
    self.intercept_term = np.random.normal(self.intercept_params[0], self.intercept_params[1])
    self.linear_term = np.random.normal(self.linear_params[0], self.linear_params[1])
    super().__init__(dataset, noise_sd)

drift_fn(roi, injection_number)

Drift function

Parameters:
  • roi
  • injection_number

Returns: ???

Source code in vimms/Column.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def drift_fn(self, roi, injection_number):
    """
    Drift function

    Args:
        roi:
        injection_number:

    Returns: ???

    """
    """f(rt) = rt + (m * rt + c)
    rt + m * rt = f(rt) - c
    rt(1 + m) = f(rt) - c
    rt = (f(rt) - c) / (1 + m)"""
    rt = roi.estimate_apex()
    return (
        rt - (rt - self.intercept_term) / (1 + self.linear_term),
        {},
    )  # this doesn't account for noise?

from_fixed_offsets(dataset, noise_sd, intercept_term, linear_term) staticmethod

From fixed offsets

Parameters:
  • dataset
  • noise_sd
  • intercept_term
  • linear_term

Returns: ???

Source code in vimms/Column.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
@staticmethod
def from_fixed_offsets(dataset, noise_sd, intercept_term, linear_term):
    """
    From fixed offsets

    Args:
        dataset:
        noise_sd:
        intercept_term:
        linear_term:

    Returns: ???

    """
    new = LinearColumn(dataset, noise_sd, (0, 0), (0, 0))
    new.intercept_term, new.linear_term = intercept_term, linear_term
    new.offsets, new.true_drift_function = new._get_offsets()
    return new