spc_spectra.spc

Classes for reading data from Thermo Grams *.SPC files

  1"""
  2Classes for reading data from Thermo Grams *.SPC files
  3"""
  4# --- Imports
  5
  6# Standard library
  7from __future__ import division, absolute_import, unicode_literals, print_function
  8import struct
  9import logging
 10
 11# External packages
 12import numpy as np
 13
 14# Local package
 15from .subfile import subFile, subFileOld, read_subheader
 16from .utils import flag_bits
 17
 18
 19# --- Constants
 20
 21_LOGGER = logging.getLogger(__name__)
 22
 23
 24# --- Classes
 25
 26class File:
 27    """
 28    Start loading the data from a .SPC spectral file using data from the header. Store all
 29    the attributes of a spectral file:
 30
 31    Data
 32    ----
 33    content: Full raw data
 34    sub[i]: sub file object for each subfileFor each subfile
 35        sub[i].y: y data for each subfile
 36    x: x-data, global, or for the first subheader
 37
 38    Examples
 39    --------
 40    >>> spc_file = File('/path/to/ftir.spc')
 41    """
 42
 43    # Format strings for various parts of the file
 44    # calculate size of strings using `struct.calcsize(string)`
 45    head_str = "<cccciddicccci9s9sh32s130s30siicchf48sfifc187s"
 46    old_head_str = "<cchfffcchcccc8shh28s130s30s32s"
 47    logstc_str = "<iiiii44s"
 48
 49    # byte positon of various parts of the file
 50    head_siz = 512
 51    old_head_siz = 256
 52    subhead_siz = 32
 53    log_siz = 64
 54
 55    subhead1_pos = head_siz + subhead_siz
 56
 57    # ------------------------------------------------------------------------
 58    # CONSTRUCTOR
 59    # ------------------------------------------------------------------------
 60
 61    def __init__(self, filename):
 62        # load entire into memory temporarly
 63        with open(filename, "rb") as fin:
 64            content = fin.read()
 65            # print "Read raw data"
 66
 67        self.length = len(content)
 68        # extract first two bytes to determine file type version
 69        self.ftflg, self.fversn = struct.unpack('<cc'.encode('utf8'), content[:2])
 70        # --------------------------------------------
 71        # NEW FORMAT (LSB)
 72        # --------------------------------------------
 73        if self.fversn == b'\x4b':
 74            # format: new LSB 1st
 75            # -------------
 76            # unpack header
 77            # -------------
 78            # use little-endian format with standard sizes
 79            # use naming scheme in SPC.H header file
 80            self.ftflg, \
 81                self.fversn, \
 82                self.fexper, \
 83                self.fexp, \
 84                self.fnpts, \
 85                self.ffirst, \
 86                self.flast, \
 87                self.fnsub, \
 88                self.fxtype, \
 89                self.fytype, \
 90                self.fztype, \
 91                self.fpost, \
 92                self.fdate, \
 93                self.fres, \
 94                self.fsource, \
 95                self.fpeakpt, \
 96                self.fspare, \
 97                self.fcmnt, \
 98                self.fcatxt, \
 99                self.flogoff, \
100                self.fmods, \
101                self.fprocs, \
102                self.flevel, \
103                self.fsampin, \
104                self.ffactor, \
105                self.fmethod, \
106                self.fzinc, \
107                self.fwplanes, \
108                self.fwinc, \
109                self.fwtype, \
110                self.freserv \
111                = struct.unpack(self.head_str.encode('utf8'), content[:self.head_siz])
112
113            # Flag bits
114            self.tsprec, \
115                self.tcgram, \
116                self.tmulti, \
117                self.trandm, \
118                self.tordrd, \
119                self.talabs, \
120                self.txyxys, \
121                self.txvals = flag_bits(self.ftflg)[::-1]
122
123            # fix data types if necessary
124            self.fnpts = int(self.fnpts)  # of points should be int
125            self.fexp = ord(self.fexp)
126
127            self.ffirst = float(self.ffirst)
128            self.flast = float(self.flast)
129
130            self.flogoff = int(self.flogoff)  # byte; should be int
131
132            self.fxtype = ord(self.fxtype)
133            self.fytype = ord(self.fytype)
134            self.fztype = ord(self.fztype)
135
136            self.fexper = ord(self.fexper)
137            self.fcmnt = str(self.fcmnt)
138
139            # Convert date time to appropriate format
140            d = self.fdate
141            self.year = d >> 20
142            self.month = (d >> 16) % (2**4)
143            self.day = (d >> 11) % (2**5)
144            self.hour = (d >> 6) % (2**5)
145            self.minute = d % (2**6)
146
147            # null terminated string, replace null characters with spaces
148            # split and join to remove multiple spaces
149            try:
150                self.cmnt = ' '.join((self.fcmnt.replace('\x00', ' ')).split())
151            except Exception:
152                self.cmnt = self.fcmnt
153
154            # figure out type of file
155            if self.fnsub > 1:
156                self.dat_multi = True
157
158            if self.txyxys:
159                # x values are given
160                self.dat_fmt = '-xy'
161            elif self.txvals:
162                # only one subfile, which contains the x data
163                self.dat_fmt = 'x-y'
164            else:
165                # no x values are given, but they can be generated
166                self.dat_fmt = 'gx-y'
167
168            _LOGGER.info('{}({})'.format(self.dat_fmt, self.fnsub))
169
170            sub_pos = self.head_siz
171
172            if not self.txyxys:
173                # txyxys don't have global x data
174                if self.txvals:
175                    # if global x data is given
176                    x_dat_pos = self.head_siz
177                    x_dat_end = self.head_siz + (4 * self.fnpts)
178                    self.x = np.array(
179                        [struct.unpack_from(
180                            'f', content[x_dat_pos:x_dat_end], 4 * i)[0]
181                            for i in range(0, self.fnpts)])
182                    sub_pos = x_dat_end
183                else:
184                    # otherwise generate them
185                    self.x = np.linspace(self.ffirst, self.flast, num=self.fnpts)
186
187            # make a list of subfiles
188            self.sub = []
189
190            # if subfile directory is given
191            if self.dat_fmt == '-xy' and self.fnpts > 0:
192                self.directory = True
193                # loop over entries in directory
194                for i in range(0, self.fnsub):
195                    ssfposn, ssfsize, ssftime = struct.unpack(
196                        '<iif'.encode('utf8'), content[self.fnpts + (i * 12):self.fnpts + ((i + 1) * 12)])
197                    # add sufile, load defaults for npts and exp
198                    self.sub.append(subFile(content[ssfposn:ssfposn + ssfsize], 0, 0, True, self.tsprec, self.tmulti))
199
200            else:
201                # don't have directory, for each subfile
202                for i in range(self.fnsub):
203                    # figure out its size
204                    if self.txyxys:
205                        # use points in subfile
206                        subhead_lst = read_subheader(content[sub_pos:(sub_pos + 32)])
207                        pts = subhead_lst[6]
208                        # 4 bytes each for x and y, and 32 for subheader
209                        dat_siz = (8 * pts) + 32
210                    else:
211                        # use global points
212                        pts = self.fnpts
213                        dat_siz = (4 * pts) + 32
214
215                    sub_end = sub_pos + dat_siz
216                    # read into object, add to list
217                    self.sub.append(subFile(content[sub_pos:sub_end],
218                                            self.fnpts, self.fexp, self.txyxys, self.tsprec, self.tmulti))
219                    # update positions
220                    sub_pos = sub_end
221
222            # if log data exists
223            # flog offset to log data offset not zero (bytes)
224            if self.flogoff:
225                log_head_end = self.flogoff + self.log_siz
226                self.logsizd, \
227                    self.logsizm, \
228                    self.logtxto, \
229                    self.logbins, \
230                    self.logdsks, \
231                    self.logspar \
232                    = struct.unpack(self.logstc_str.encode('utf8'),
233                                    content[self.flogoff:log_head_end])
234                log_pos = self.flogoff + self.logtxto
235
236                log_end_pos = log_pos + self.logsizd
237
238                # line endings: get rid of any '\r' and then split on '\n'
239                self.log_content = content[log_pos:log_end_pos].replace(b'\r', b'').split(b'\n')
240
241                # split log data into dictionary based on =
242                self.log_dict = dict()
243                self.log_other = []  # put the rest into a list
244                for x in self.log_content:
245                    if x.find(b'=') >= 0:
246                        # stop it from breaking if there is more than 1 =
247                        key, value = x.split(b'=')[:2]
248                        self.log_dict[key] = value
249                    else:
250                        self.log_other.append(x)
251
252            # spacing between data
253            self.spacing = (self.flast - self.ffirst) / (self.fnpts - 1)
254
255            # call functions
256            self.set_labels()
257            self.set_exp_type()
258
259        # --------------------------------------------
260        # NEW FORMAT (MSB)
261        # --------------------------------------------
262        elif self.fversn == b'\x4c':
263            # new MSB 1st
264            print("New MSB 1st, yet to be implemented")
265            pass  # To be implemented
266
267        # --------------------------------------------
268        # OLD FORMAT
269        # --------------------------------------------
270        elif self.fversn == b'\x4d':
271            # old format
272            # oxtype -> fxtype
273            # oytype -> fytype
274            self.oftflgs, \
275                self.oversn, \
276                self.oexp, \
277                self.onpts, \
278                self.ofirst, \
279                self.olast, \
280                self.fxtype, \
281                self.fytype, \
282                self.oyear, \
283                self.omonth, \
284                self.oday, \
285                self.ohour, \
286                self.ominute, \
287                self.ores, \
288                self.opeakpt, \
289                self.onscans, \
290                self.ospare, \
291                self.ocmnt, \
292                self.ocatxt, \
293                self.osubh1 = struct.unpack(self.old_head_str.encode('utf8'),
294                                            content[:self.old_head_siz])
295
296            # Flag bits (assuming same)
297            self.tsprec, \
298                self.tcgram, \
299                self.tmulti, \
300                self.trandm, \
301                self.tordrd, \
302                self.talabs, \
303                self.txyxys, \
304                self.txvals = flag_bits(self.oftflgs)[::-1]
305
306            # fix data types
307            self.oexp = int(self.oexp)
308            self.onpts = int(self.onpts)  # can't have floating num of pts
309            self.ofirst = float(self.ofirst)
310            self.olast = float(self.olast)
311
312            # Date information
313            # !! to fix !!
314            # Year collected (0=no date/time) - MSB 4 bits are Z type
315
316            # extracted as characters, using ord
317            self.omonth = ord(self.omonth)
318            self.oday = ord(self.oday)
319            self.ohour = ord(self.ohour)
320            self.ominute = ord(self.ominute)
321
322            # number of scans (? subfiles sometimes ?)
323            self.onscans = int(self.onscans)
324
325            # null terminated strings
326            self.ores = self.ores.split(b'\x00')[0]
327            self.ocmnt = self.ocmnt.split(b'\x00')[0]
328
329            # can it have separate x values ?
330            self.x = np.linspace(self.ofirst, self.olast, num=self.onpts)
331
332            # make a list of subfiles
333            self.sub = []
334
335            # already have subheader from main header, retrace steps
336            sub_pos = self.old_head_siz - self.subhead_siz
337
338            # for each subfile
339            # in the old format we don't know how many subfiles to expect,
340            # just looping till we run out
341            i = 0
342            while True:
343                try:
344                    # read in subheader
345                    subhead_lst = read_subheader(content[sub_pos:sub_pos + self.subhead_siz])
346
347                    if subhead_lst[6] > 0:
348                        # default to subfile points, unless it is zero
349                        pts = subhead_lst[6]
350                    else:
351                        pts = self.onpts
352
353                    # figure out size of subheader
354                    dat_siz = (4 * pts)
355                    sub_end = sub_pos + self.subhead_siz + dat_siz
356
357                    # read into object, add to list
358                    # send it pts since we have already figured that out
359                    self.sub.append(subFileOld(
360                        content[sub_pos:sub_end], pts, self.oexp, self.txyxys))
361                    # update next subfile postion, and index
362                    sub_pos = sub_end
363
364                    i += 1
365                except Exception:
366                    # zero indexed, set the total number of subfile
367                    self.fnsub = i + 1
368                    break
369
370            # assuming it can't have separate x values
371            self.dat_fmt = 'gx-y'
372            _LOGGER.info('{}({})'.format(self.dat_fmt, self.fnsub))
373
374            self.fxtype = ord(self.fxtype)
375            self.fytype = ord(self.fytype)
376            # need to find from year apparently
377            self.fztype = 0
378            self.set_labels()
379
380        # --------------------------------------------
381        # SHIMADZU
382        # --------------------------------------------
383        elif self.fversn == b'\xcf':
384            print("Highly experimental format, may not work ")
385            raw_data = content[10240:]  # data starts here (maybe every time)
386            # spacing between y and x data is atleast 0 bytes
387            s_32 = chr(int('0', 2)) * 32
388            s_8 = chr(int('0', 2)) * 8  # zero double
389            dat_len = raw_data.find(s_32)
390            for i in range(dat_len, len(raw_data), 8):
391                # find first non zero double
392                if raw_data[i:i + 8] != s_8:
393                    break
394            dat_siz = int(dat_len / 8)
395            self.y = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[:dat_len])
396            self.x = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[i:i + dat_len])
397
398        else:
399            print("File type %s not supported yet. Please add issue. "
400                  % hex(ord(self.fversn)))
401            self.content = content
402
403    # ------------------------------------------------------------------------
404    # Process other data
405    # ------------------------------------------------------------------------
406
407    def set_labels(self):
408        """
409        Set the x, y, z axis labels using various information in file content.
410        """
411
412        # --------------------------
413        # units for x,z,w axes
414        # --------------------------
415        fxtype_op = ["Arbitrary",
416                     "Wavenumber (cm-1)",
417                     "Micrometers (um)",
418                     "Nanometers (nm)",
419                     "Seconds ",
420                     "Minutes", "Hertz (Hz)",
421                     "Kilohertz (KHz)",
422                     "Megahertz (MHz) ",
423                     "Mass (M/z)",
424                     "Parts per million (PPM)",
425                     "Days",
426                     "Years",
427                     "Raman Shift (cm-1)",
428                     "eV",
429                     "XYZ text labels in fcatxt (old 0x4D version only)",
430                     "Diode Number",
431                     "Channel",
432                     "Degrees",
433                     "Temperature (F)",
434                     "Temperature (C)",
435                     "Temperature (K)",
436                     "Data Points",
437                     "Milliseconds (mSec)",
438                     "Microseconds (uSec) ",
439                     "Nanoseconds (nSec)",
440                     "Gigahertz (GHz)",
441                     "Centimeters (cm)",
442                     "Meters (m)",
443                     "Millimeters (mm)",
444                     "Hours"]
445
446        if self.fxtype < 30:
447            self.xlabel = fxtype_op[self.fxtype]
448        else:
449            self.xlabel = "Unknown"
450
451        if self.fztype < 30:
452            self.zlabel = fxtype_op[self.fztype]
453        else:
454            self.zlabel = "Unknown"
455
456        # --------------------------
457        # units y-axis
458        # --------------------------
459
460        fytype_op = ["Arbitrary Intensity",
461                     "Interferogram",
462                     "Absorbance",
463                     "Kubelka-Munk",
464                     "Counts",
465                     "Volts",
466                     "Degrees",
467                     "Milliamps",
468                     "Millimeters",
469                     "Millivolts",
470                     "Log(1/R)",
471                     "Percent",
472                     "Intensity",
473                     "Relative Intensity",
474                     "Energy",
475                     "",
476                     "Decibel",
477                     "",
478                     "",
479                     "Temperature (F)",
480                     "Temperature (C)",
481                     "Temperature (K)",
482                     "Index of Refraction [N]",
483                     "Extinction Coeff. [K]",
484                     "Real",
485                     "Imaginary",
486                     "Complex"]
487
488        fytype_op2 = ["Transmission",
489                      "Reflectance",
490                      "Arbitrary or Single Beam with Valley Peaks",
491                      "Emission"]
492
493        if self.fytype < 27:
494            self.ylabel = fytype_op[self.fytype]
495        elif self.fytype > 127 and self.fytype < 132:
496            self.ylabel = fytype_op2[self.fytype - 128]
497        else:
498            self.ylabel = "Unknown"
499
500        # --------------------------
501        # check if labels are included as text
502        # --------------------------
503
504        # split it based on 00 string
505        # format x, y, z
506        if self.talabs:
507            ll = self.fcatxt.split(b'\x00')
508            if len(ll) > 2:
509                # make sure there are enough items to extract from
510                xl, yl, zl = ll[:3]
511
512                # overwrite only if non zero
513                if len(xl) > 0:
514                    self.xlabel = xl
515                if len(yl) > 0:
516                    self.ylabel = yl
517                if len(zl) > 0:
518                    self.zlabel = zl
519
520    def set_exp_type(self):
521        """
522        Set the experiment type.
523        """
524
525        fexper_op = ["General SPC",
526                     "Gas Chromatogram",
527                     "General Chromatogram",
528                     "HPLC Chromatogram",
529                     "FT-IR, FT-NIR, FT-Raman Spectrum or Igram",
530                     "NIR Spectrum",
531                     "UV-VIS Spectrum",
532                     "X-ray Diffraction Spectrum",
533                     "Mass Spectrum ",
534                     "NMR Spectrum or FID",
535                     "Raman Spectrum",
536                     "Fluorescence Spectrum",
537                     "Atomic Spectrum",
538                     "Chromatography Diode Array Spectra"]
539
540        self.exp_type = fexper_op[self.fexper]
541
542    # ------------------------------------------------------------------------
543    # output
544    # ------------------------------------------------------------------------
545    def data_txt(self, delimiter='\t', newline='\n'):
546        r"""
547        Returns x,y column data as a string variable, can be printed to standard output or
548        fed to text file.
549
550        Arguments
551        ---------
552        delimiter: chr (default='\t')
553            delimiter character for column separation
554        newline: chr (default='\n')
555            newline character, may want to use '\r\n' for Windows based output
556
557        Example
558        -------
559        >>> spc_file = File('/path/to/ftir.spc')
560        >>> spc_file.data_txt(newline='\r\n')
561        """
562
563        dat = ''
564        if self.fnsub == 1:
565            if self.dat_fmt.endswith('-xy'):
566                x = self.sub[0].x
567            else:
568                x = self.x
569            y = self.sub[0].y
570
571            for x1, y1 in zip(x, y):
572                dat += '{}{}{}{}'.format(x1, delimiter, y1, newline)
573        else:
574            if not self.dat_fmt.endswith('-xy'):
575                # does not have separate x data
576                for i in range(len(self.x)):
577                    dat += '{}'.format(self.x[i])
578                    for s in self.sub:
579                        dat += '{}{}'.format(delimiter, s.y[i])
580                    dat += newline
581            else:
582                # txyxy format, return one long xy file with subfiles
583                # separated by blank lines
584                for i in self.sub:
585                    for x1, y1 in zip(i.x, i.y):
586                        dat += '{}{}{}{}'.format(x1, delimiter, y1, newline)
587                    dat += newline
588        return dat
589
590    def write_file(self, path, delimiter='\t', newline='\n'):
591        """
592        Output x, y data to tab-separated text file.
593
594        Arguments
595        ---------
596        path: str
597            full path to output file including extension
598        delimiter: chr (default='\t')
599            delimiter character for column separation
600        newline: chr (default='\n')
601            newline character, may want to use '\r\n' for Windows based output
602
603        Example
604        -------
605        >>> spc_file = File('/path/to/ftir.spc')
606        >>> spc_file.writefile('/Users/home/output.txt', delimiter=',')
607        """
608        with open(path, 'w') as f:
609            f.write(self.data_txt(delimiter, newline))
610
611    def print_metadata(self):
612        """
613        Print out select metadata.
614        """
615        print("Scan: ", self.log_dict['Comment'], "\n",
616              float(self.log_dict['Start']), "to ",
617              float(self.log_dict['End']), "; ",
618              float(self.log_dict['Increment']), "cm-1;",
619              float(self.log_dict['Integration Time']), "s integration time")
620
621    def plot(self):
622        """
623        Plot data and return figure object.
624
625        Requires matplotlib installed
626
627        Example
628        -------
629        >>> spc_file = File('/path/to/ftir.spc')
630        >>> spc_file.plot()
631        """
632        import matplotlib.pyplot as plt
633        if self.dat_fmt.endswith('-xy'):
634            for s in self.sub:
635                plt.plot(s.x, s.y)
636        else:
637            x = self.x
638            for s in self.sub:
639                plt.plot(x, s.y)
640        plt.xlabel(self.xlabel)
641        plt.ylabel(self.ylabel)
642        return plt.gcf()
643
644    def debug_info(self):
645        """
646        Print debugging information extracted from flags and header information.
647
648        Example
649        -------
650        >>> spc_file = File('/path/to/ftir.spc')
651        >>> spc_file.debug_info()
652        """
653        print("\nDEBUG INFO\nFlags:\n")
654        # Flag bits
655        if self.tsprec:
656            print("16-bit y data")
657        if self.tcgram:
658            print("enable fexper")
659        if self.tmulti:
660            print("multiple traces")
661        if self.trandm:
662            print("arb time (z) values")
663        if self.tordrd:
664            print("ordered but uneven subtimes")
665        if self.talabs:
666            print("use fcatxt axis not fxtype")
667        if self.txyxys:
668            print("each subfile has own x's")
669        if self.txvals:
670            print("floating x-value array preceeds y's")
671
672        print('----\n')
673        # spc format version
674        if self.fversn == chr(0x4b):
675            self.pr_versn = "new LSB 1st"
676        elif self.fversn == chr(0x4c):
677            self.pr_versn = "new MSB 1st"
678        elif self.fversn == chr(0x4d):
679            self.pr_versn = "old format"
680        else:
681            self.pr_versn = "unknown version"
682
683        print("Version:", self.pr_versn)
684
685        # subfiles
686        if self.fnsub == 1:
687            print("Single file only")
688        else:
689            print("Multiple subfiles:", self.fnsub)
690
691        # multiple y values
692        if self.tmulti:
693            print("Multiple y-values")
694        else:
695            print("Single set of y-values")
696
697        # print "There are ", self.fnpts, \
698        #    " points between ", self.ffirst, \
699        #    " and ", self.flast, \
700        #    " in steps of ", self.pr_spacing
class File:
 27class File:
 28    """
 29    Start loading the data from a .SPC spectral file using data from the header. Store all
 30    the attributes of a spectral file:
 31
 32    Data
 33    ----
 34    content: Full raw data
 35    sub[i]: sub file object for each subfileFor each subfile
 36        sub[i].y: y data for each subfile
 37    x: x-data, global, or for the first subheader
 38
 39    Examples
 40    --------
 41    >>> spc_file = File('/path/to/ftir.spc')
 42    """
 43
 44    # Format strings for various parts of the file
 45    # calculate size of strings using `struct.calcsize(string)`
 46    head_str = "<cccciddicccci9s9sh32s130s30siicchf48sfifc187s"
 47    old_head_str = "<cchfffcchcccc8shh28s130s30s32s"
 48    logstc_str = "<iiiii44s"
 49
 50    # byte positon of various parts of the file
 51    head_siz = 512
 52    old_head_siz = 256
 53    subhead_siz = 32
 54    log_siz = 64
 55
 56    subhead1_pos = head_siz + subhead_siz
 57
 58    # ------------------------------------------------------------------------
 59    # CONSTRUCTOR
 60    # ------------------------------------------------------------------------
 61
 62    def __init__(self, filename):
 63        # load entire into memory temporarly
 64        with open(filename, "rb") as fin:
 65            content = fin.read()
 66            # print "Read raw data"
 67
 68        self.length = len(content)
 69        # extract first two bytes to determine file type version
 70        self.ftflg, self.fversn = struct.unpack('<cc'.encode('utf8'), content[:2])
 71        # --------------------------------------------
 72        # NEW FORMAT (LSB)
 73        # --------------------------------------------
 74        if self.fversn == b'\x4b':
 75            # format: new LSB 1st
 76            # -------------
 77            # unpack header
 78            # -------------
 79            # use little-endian format with standard sizes
 80            # use naming scheme in SPC.H header file
 81            self.ftflg, \
 82                self.fversn, \
 83                self.fexper, \
 84                self.fexp, \
 85                self.fnpts, \
 86                self.ffirst, \
 87                self.flast, \
 88                self.fnsub, \
 89                self.fxtype, \
 90                self.fytype, \
 91                self.fztype, \
 92                self.fpost, \
 93                self.fdate, \
 94                self.fres, \
 95                self.fsource, \
 96                self.fpeakpt, \
 97                self.fspare, \
 98                self.fcmnt, \
 99                self.fcatxt, \
100                self.flogoff, \
101                self.fmods, \
102                self.fprocs, \
103                self.flevel, \
104                self.fsampin, \
105                self.ffactor, \
106                self.fmethod, \
107                self.fzinc, \
108                self.fwplanes, \
109                self.fwinc, \
110                self.fwtype, \
111                self.freserv \
112                = struct.unpack(self.head_str.encode('utf8'), content[:self.head_siz])
113
114            # Flag bits
115            self.tsprec, \
116                self.tcgram, \
117                self.tmulti, \
118                self.trandm, \
119                self.tordrd, \
120                self.talabs, \
121                self.txyxys, \
122                self.txvals = flag_bits(self.ftflg)[::-1]
123
124            # fix data types if necessary
125            self.fnpts = int(self.fnpts)  # of points should be int
126            self.fexp = ord(self.fexp)
127
128            self.ffirst = float(self.ffirst)
129            self.flast = float(self.flast)
130
131            self.flogoff = int(self.flogoff)  # byte; should be int
132
133            self.fxtype = ord(self.fxtype)
134            self.fytype = ord(self.fytype)
135            self.fztype = ord(self.fztype)
136
137            self.fexper = ord(self.fexper)
138            self.fcmnt = str(self.fcmnt)
139
140            # Convert date time to appropriate format
141            d = self.fdate
142            self.year = d >> 20
143            self.month = (d >> 16) % (2**4)
144            self.day = (d >> 11) % (2**5)
145            self.hour = (d >> 6) % (2**5)
146            self.minute = d % (2**6)
147
148            # null terminated string, replace null characters with spaces
149            # split and join to remove multiple spaces
150            try:
151                self.cmnt = ' '.join((self.fcmnt.replace('\x00', ' ')).split())
152            except Exception:
153                self.cmnt = self.fcmnt
154
155            # figure out type of file
156            if self.fnsub > 1:
157                self.dat_multi = True
158
159            if self.txyxys:
160                # x values are given
161                self.dat_fmt = '-xy'
162            elif self.txvals:
163                # only one subfile, which contains the x data
164                self.dat_fmt = 'x-y'
165            else:
166                # no x values are given, but they can be generated
167                self.dat_fmt = 'gx-y'
168
169            _LOGGER.info('{}({})'.format(self.dat_fmt, self.fnsub))
170
171            sub_pos = self.head_siz
172
173            if not self.txyxys:
174                # txyxys don't have global x data
175                if self.txvals:
176                    # if global x data is given
177                    x_dat_pos = self.head_siz
178                    x_dat_end = self.head_siz + (4 * self.fnpts)
179                    self.x = np.array(
180                        [struct.unpack_from(
181                            'f', content[x_dat_pos:x_dat_end], 4 * i)[0]
182                            for i in range(0, self.fnpts)])
183                    sub_pos = x_dat_end
184                else:
185                    # otherwise generate them
186                    self.x = np.linspace(self.ffirst, self.flast, num=self.fnpts)
187
188            # make a list of subfiles
189            self.sub = []
190
191            # if subfile directory is given
192            if self.dat_fmt == '-xy' and self.fnpts > 0:
193                self.directory = True
194                # loop over entries in directory
195                for i in range(0, self.fnsub):
196                    ssfposn, ssfsize, ssftime = struct.unpack(
197                        '<iif'.encode('utf8'), content[self.fnpts + (i * 12):self.fnpts + ((i + 1) * 12)])
198                    # add sufile, load defaults for npts and exp
199                    self.sub.append(subFile(content[ssfposn:ssfposn + ssfsize], 0, 0, True, self.tsprec, self.tmulti))
200
201            else:
202                # don't have directory, for each subfile
203                for i in range(self.fnsub):
204                    # figure out its size
205                    if self.txyxys:
206                        # use points in subfile
207                        subhead_lst = read_subheader(content[sub_pos:(sub_pos + 32)])
208                        pts = subhead_lst[6]
209                        # 4 bytes each for x and y, and 32 for subheader
210                        dat_siz = (8 * pts) + 32
211                    else:
212                        # use global points
213                        pts = self.fnpts
214                        dat_siz = (4 * pts) + 32
215
216                    sub_end = sub_pos + dat_siz
217                    # read into object, add to list
218                    self.sub.append(subFile(content[sub_pos:sub_end],
219                                            self.fnpts, self.fexp, self.txyxys, self.tsprec, self.tmulti))
220                    # update positions
221                    sub_pos = sub_end
222
223            # if log data exists
224            # flog offset to log data offset not zero (bytes)
225            if self.flogoff:
226                log_head_end = self.flogoff + self.log_siz
227                self.logsizd, \
228                    self.logsizm, \
229                    self.logtxto, \
230                    self.logbins, \
231                    self.logdsks, \
232                    self.logspar \
233                    = struct.unpack(self.logstc_str.encode('utf8'),
234                                    content[self.flogoff:log_head_end])
235                log_pos = self.flogoff + self.logtxto
236
237                log_end_pos = log_pos + self.logsizd
238
239                # line endings: get rid of any '\r' and then split on '\n'
240                self.log_content = content[log_pos:log_end_pos].replace(b'\r', b'').split(b'\n')
241
242                # split log data into dictionary based on =
243                self.log_dict = dict()
244                self.log_other = []  # put the rest into a list
245                for x in self.log_content:
246                    if x.find(b'=') >= 0:
247                        # stop it from breaking if there is more than 1 =
248                        key, value = x.split(b'=')[:2]
249                        self.log_dict[key] = value
250                    else:
251                        self.log_other.append(x)
252
253            # spacing between data
254            self.spacing = (self.flast - self.ffirst) / (self.fnpts - 1)
255
256            # call functions
257            self.set_labels()
258            self.set_exp_type()
259
260        # --------------------------------------------
261        # NEW FORMAT (MSB)
262        # --------------------------------------------
263        elif self.fversn == b'\x4c':
264            # new MSB 1st
265            print("New MSB 1st, yet to be implemented")
266            pass  # To be implemented
267
268        # --------------------------------------------
269        # OLD FORMAT
270        # --------------------------------------------
271        elif self.fversn == b'\x4d':
272            # old format
273            # oxtype -> fxtype
274            # oytype -> fytype
275            self.oftflgs, \
276                self.oversn, \
277                self.oexp, \
278                self.onpts, \
279                self.ofirst, \
280                self.olast, \
281                self.fxtype, \
282                self.fytype, \
283                self.oyear, \
284                self.omonth, \
285                self.oday, \
286                self.ohour, \
287                self.ominute, \
288                self.ores, \
289                self.opeakpt, \
290                self.onscans, \
291                self.ospare, \
292                self.ocmnt, \
293                self.ocatxt, \
294                self.osubh1 = struct.unpack(self.old_head_str.encode('utf8'),
295                                            content[:self.old_head_siz])
296
297            # Flag bits (assuming same)
298            self.tsprec, \
299                self.tcgram, \
300                self.tmulti, \
301                self.trandm, \
302                self.tordrd, \
303                self.talabs, \
304                self.txyxys, \
305                self.txvals = flag_bits(self.oftflgs)[::-1]
306
307            # fix data types
308            self.oexp = int(self.oexp)
309            self.onpts = int(self.onpts)  # can't have floating num of pts
310            self.ofirst = float(self.ofirst)
311            self.olast = float(self.olast)
312
313            # Date information
314            # !! to fix !!
315            # Year collected (0=no date/time) - MSB 4 bits are Z type
316
317            # extracted as characters, using ord
318            self.omonth = ord(self.omonth)
319            self.oday = ord(self.oday)
320            self.ohour = ord(self.ohour)
321            self.ominute = ord(self.ominute)
322
323            # number of scans (? subfiles sometimes ?)
324            self.onscans = int(self.onscans)
325
326            # null terminated strings
327            self.ores = self.ores.split(b'\x00')[0]
328            self.ocmnt = self.ocmnt.split(b'\x00')[0]
329
330            # can it have separate x values ?
331            self.x = np.linspace(self.ofirst, self.olast, num=self.onpts)
332
333            # make a list of subfiles
334            self.sub = []
335
336            # already have subheader from main header, retrace steps
337            sub_pos = self.old_head_siz - self.subhead_siz
338
339            # for each subfile
340            # in the old format we don't know how many subfiles to expect,
341            # just looping till we run out
342            i = 0
343            while True:
344                try:
345                    # read in subheader
346                    subhead_lst = read_subheader(content[sub_pos:sub_pos + self.subhead_siz])
347
348                    if subhead_lst[6] > 0:
349                        # default to subfile points, unless it is zero
350                        pts = subhead_lst[6]
351                    else:
352                        pts = self.onpts
353
354                    # figure out size of subheader
355                    dat_siz = (4 * pts)
356                    sub_end = sub_pos + self.subhead_siz + dat_siz
357
358                    # read into object, add to list
359                    # send it pts since we have already figured that out
360                    self.sub.append(subFileOld(
361                        content[sub_pos:sub_end], pts, self.oexp, self.txyxys))
362                    # update next subfile postion, and index
363                    sub_pos = sub_end
364
365                    i += 1
366                except Exception:
367                    # zero indexed, set the total number of subfile
368                    self.fnsub = i + 1
369                    break
370
371            # assuming it can't have separate x values
372            self.dat_fmt = 'gx-y'
373            _LOGGER.info('{}({})'.format(self.dat_fmt, self.fnsub))
374
375            self.fxtype = ord(self.fxtype)
376            self.fytype = ord(self.fytype)
377            # need to find from year apparently
378            self.fztype = 0
379            self.set_labels()
380
381        # --------------------------------------------
382        # SHIMADZU
383        # --------------------------------------------
384        elif self.fversn == b'\xcf':
385            print("Highly experimental format, may not work ")
386            raw_data = content[10240:]  # data starts here (maybe every time)
387            # spacing between y and x data is atleast 0 bytes
388            s_32 = chr(int('0', 2)) * 32
389            s_8 = chr(int('0', 2)) * 8  # zero double
390            dat_len = raw_data.find(s_32)
391            for i in range(dat_len, len(raw_data), 8):
392                # find first non zero double
393                if raw_data[i:i + 8] != s_8:
394                    break
395            dat_siz = int(dat_len / 8)
396            self.y = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[:dat_len])
397            self.x = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[i:i + dat_len])
398
399        else:
400            print("File type %s not supported yet. Please add issue. "
401                  % hex(ord(self.fversn)))
402            self.content = content
403
404    # ------------------------------------------------------------------------
405    # Process other data
406    # ------------------------------------------------------------------------
407
408    def set_labels(self):
409        """
410        Set the x, y, z axis labels using various information in file content.
411        """
412
413        # --------------------------
414        # units for x,z,w axes
415        # --------------------------
416        fxtype_op = ["Arbitrary",
417                     "Wavenumber (cm-1)",
418                     "Micrometers (um)",
419                     "Nanometers (nm)",
420                     "Seconds ",
421                     "Minutes", "Hertz (Hz)",
422                     "Kilohertz (KHz)",
423                     "Megahertz (MHz) ",
424                     "Mass (M/z)",
425                     "Parts per million (PPM)",
426                     "Days",
427                     "Years",
428                     "Raman Shift (cm-1)",
429                     "eV",
430                     "XYZ text labels in fcatxt (old 0x4D version only)",
431                     "Diode Number",
432                     "Channel",
433                     "Degrees",
434                     "Temperature (F)",
435                     "Temperature (C)",
436                     "Temperature (K)",
437                     "Data Points",
438                     "Milliseconds (mSec)",
439                     "Microseconds (uSec) ",
440                     "Nanoseconds (nSec)",
441                     "Gigahertz (GHz)",
442                     "Centimeters (cm)",
443                     "Meters (m)",
444                     "Millimeters (mm)",
445                     "Hours"]
446
447        if self.fxtype < 30:
448            self.xlabel = fxtype_op[self.fxtype]
449        else:
450            self.xlabel = "Unknown"
451
452        if self.fztype < 30:
453            self.zlabel = fxtype_op[self.fztype]
454        else:
455            self.zlabel = "Unknown"
456
457        # --------------------------
458        # units y-axis
459        # --------------------------
460
461        fytype_op = ["Arbitrary Intensity",
462                     "Interferogram",
463                     "Absorbance",
464                     "Kubelka-Munk",
465                     "Counts",
466                     "Volts",
467                     "Degrees",
468                     "Milliamps",
469                     "Millimeters",
470                     "Millivolts",
471                     "Log(1/R)",
472                     "Percent",
473                     "Intensity",
474                     "Relative Intensity",
475                     "Energy",
476                     "",
477                     "Decibel",
478                     "",
479                     "",
480                     "Temperature (F)",
481                     "Temperature (C)",
482                     "Temperature (K)",
483                     "Index of Refraction [N]",
484                     "Extinction Coeff. [K]",
485                     "Real",
486                     "Imaginary",
487                     "Complex"]
488
489        fytype_op2 = ["Transmission",
490                      "Reflectance",
491                      "Arbitrary or Single Beam with Valley Peaks",
492                      "Emission"]
493
494        if self.fytype < 27:
495            self.ylabel = fytype_op[self.fytype]
496        elif self.fytype > 127 and self.fytype < 132:
497            self.ylabel = fytype_op2[self.fytype - 128]
498        else:
499            self.ylabel = "Unknown"
500
501        # --------------------------
502        # check if labels are included as text
503        # --------------------------
504
505        # split it based on 00 string
506        # format x, y, z
507        if self.talabs:
508            ll = self.fcatxt.split(b'\x00')
509            if len(ll) > 2:
510                # make sure there are enough items to extract from
511                xl, yl, zl = ll[:3]
512
513                # overwrite only if non zero
514                if len(xl) > 0:
515                    self.xlabel = xl
516                if len(yl) > 0:
517                    self.ylabel = yl
518                if len(zl) > 0:
519                    self.zlabel = zl
520
521    def set_exp_type(self):
522        """
523        Set the experiment type.
524        """
525
526        fexper_op = ["General SPC",
527                     "Gas Chromatogram",
528                     "General Chromatogram",
529                     "HPLC Chromatogram",
530                     "FT-IR, FT-NIR, FT-Raman Spectrum or Igram",
531                     "NIR Spectrum",
532                     "UV-VIS Spectrum",
533                     "X-ray Diffraction Spectrum",
534                     "Mass Spectrum ",
535                     "NMR Spectrum or FID",
536                     "Raman Spectrum",
537                     "Fluorescence Spectrum",
538                     "Atomic Spectrum",
539                     "Chromatography Diode Array Spectra"]
540
541        self.exp_type = fexper_op[self.fexper]
542
543    # ------------------------------------------------------------------------
544    # output
545    # ------------------------------------------------------------------------
546    def data_txt(self, delimiter='\t', newline='\n'):
547        r"""
548        Returns x,y column data as a string variable, can be printed to standard output or
549        fed to text file.
550
551        Arguments
552        ---------
553        delimiter: chr (default='\t')
554            delimiter character for column separation
555        newline: chr (default='\n')
556            newline character, may want to use '\r\n' for Windows based output
557
558        Example
559        -------
560        >>> spc_file = File('/path/to/ftir.spc')
561        >>> spc_file.data_txt(newline='\r\n')
562        """
563
564        dat = ''
565        if self.fnsub == 1:
566            if self.dat_fmt.endswith('-xy'):
567                x = self.sub[0].x
568            else:
569                x = self.x
570            y = self.sub[0].y
571
572            for x1, y1 in zip(x, y):
573                dat += '{}{}{}{}'.format(x1, delimiter, y1, newline)
574        else:
575            if not self.dat_fmt.endswith('-xy'):
576                # does not have separate x data
577                for i in range(len(self.x)):
578                    dat += '{}'.format(self.x[i])
579                    for s in self.sub:
580                        dat += '{}{}'.format(delimiter, s.y[i])
581                    dat += newline
582            else:
583                # txyxy format, return one long xy file with subfiles
584                # separated by blank lines
585                for i in self.sub:
586                    for x1, y1 in zip(i.x, i.y):
587                        dat += '{}{}{}{}'.format(x1, delimiter, y1, newline)
588                    dat += newline
589        return dat
590
591    def write_file(self, path, delimiter='\t', newline='\n'):
592        """
593        Output x, y data to tab-separated text file.
594
595        Arguments
596        ---------
597        path: str
598            full path to output file including extension
599        delimiter: chr (default='\t')
600            delimiter character for column separation
601        newline: chr (default='\n')
602            newline character, may want to use '\r\n' for Windows based output
603
604        Example
605        -------
606        >>> spc_file = File('/path/to/ftir.spc')
607        >>> spc_file.writefile('/Users/home/output.txt', delimiter=',')
608        """
609        with open(path, 'w') as f:
610            f.write(self.data_txt(delimiter, newline))
611
612    def print_metadata(self):
613        """
614        Print out select metadata.
615        """
616        print("Scan: ", self.log_dict['Comment'], "\n",
617              float(self.log_dict['Start']), "to ",
618              float(self.log_dict['End']), "; ",
619              float(self.log_dict['Increment']), "cm-1;",
620              float(self.log_dict['Integration Time']), "s integration time")
621
622    def plot(self):
623        """
624        Plot data and return figure object.
625
626        Requires matplotlib installed
627
628        Example
629        -------
630        >>> spc_file = File('/path/to/ftir.spc')
631        >>> spc_file.plot()
632        """
633        import matplotlib.pyplot as plt
634        if self.dat_fmt.endswith('-xy'):
635            for s in self.sub:
636                plt.plot(s.x, s.y)
637        else:
638            x = self.x
639            for s in self.sub:
640                plt.plot(x, s.y)
641        plt.xlabel(self.xlabel)
642        plt.ylabel(self.ylabel)
643        return plt.gcf()
644
645    def debug_info(self):
646        """
647        Print debugging information extracted from flags and header information.
648
649        Example
650        -------
651        >>> spc_file = File('/path/to/ftir.spc')
652        >>> spc_file.debug_info()
653        """
654        print("\nDEBUG INFO\nFlags:\n")
655        # Flag bits
656        if self.tsprec:
657            print("16-bit y data")
658        if self.tcgram:
659            print("enable fexper")
660        if self.tmulti:
661            print("multiple traces")
662        if self.trandm:
663            print("arb time (z) values")
664        if self.tordrd:
665            print("ordered but uneven subtimes")
666        if self.talabs:
667            print("use fcatxt axis not fxtype")
668        if self.txyxys:
669            print("each subfile has own x's")
670        if self.txvals:
671            print("floating x-value array preceeds y's")
672
673        print('----\n')
674        # spc format version
675        if self.fversn == chr(0x4b):
676            self.pr_versn = "new LSB 1st"
677        elif self.fversn == chr(0x4c):
678            self.pr_versn = "new MSB 1st"
679        elif self.fversn == chr(0x4d):
680            self.pr_versn = "old format"
681        else:
682            self.pr_versn = "unknown version"
683
684        print("Version:", self.pr_versn)
685
686        # subfiles
687        if self.fnsub == 1:
688            print("Single file only")
689        else:
690            print("Multiple subfiles:", self.fnsub)
691
692        # multiple y values
693        if self.tmulti:
694            print("Multiple y-values")
695        else:
696            print("Single set of y-values")
697
698        # print "There are ", self.fnpts, \
699        #    " points between ", self.ffirst, \
700        #    " and ", self.flast, \
701        #    " in steps of ", self.pr_spacing

Start loading the data from a .SPC spectral file using data from the header. Store all the attributes of a spectral file:

Data

content: Full raw data sub[i]: sub file object for each subfileFor each subfile sub[i].y: y data for each subfile x: x-data, global, or for the first subheader

Examples

>>> spc_file = File('/path/to/ftirspc_spectra.spc')
File(filename)
 62    def __init__(self, filename):
 63        # load entire into memory temporarly
 64        with open(filename, "rb") as fin:
 65            content = fin.read()
 66            # print "Read raw data"
 67
 68        self.length = len(content)
 69        # extract first two bytes to determine file type version
 70        self.ftflg, self.fversn = struct.unpack('<cc'.encode('utf8'), content[:2])
 71        # --------------------------------------------
 72        # NEW FORMAT (LSB)
 73        # --------------------------------------------
 74        if self.fversn == b'\x4b':
 75            # format: new LSB 1st
 76            # -------------
 77            # unpack header
 78            # -------------
 79            # use little-endian format with standard sizes
 80            # use naming scheme in SPC.H header file
 81            self.ftflg, \
 82                self.fversn, \
 83                self.fexper, \
 84                self.fexp, \
 85                self.fnpts, \
 86                self.ffirst, \
 87                self.flast, \
 88                self.fnsub, \
 89                self.fxtype, \
 90                self.fytype, \
 91                self.fztype, \
 92                self.fpost, \
 93                self.fdate, \
 94                self.fres, \
 95                self.fsource, \
 96                self.fpeakpt, \
 97                self.fspare, \
 98                self.fcmnt, \
 99                self.fcatxt, \
100                self.flogoff, \
101                self.fmods, \
102                self.fprocs, \
103                self.flevel, \
104                self.fsampin, \
105                self.ffactor, \
106                self.fmethod, \
107                self.fzinc, \
108                self.fwplanes, \
109                self.fwinc, \
110                self.fwtype, \
111                self.freserv \
112                = struct.unpack(self.head_str.encode('utf8'), content[:self.head_siz])
113
114            # Flag bits
115            self.tsprec, \
116                self.tcgram, \
117                self.tmulti, \
118                self.trandm, \
119                self.tordrd, \
120                self.talabs, \
121                self.txyxys, \
122                self.txvals = flag_bits(self.ftflg)[::-1]
123
124            # fix data types if necessary
125            self.fnpts = int(self.fnpts)  # of points should be int
126            self.fexp = ord(self.fexp)
127
128            self.ffirst = float(self.ffirst)
129            self.flast = float(self.flast)
130
131            self.flogoff = int(self.flogoff)  # byte; should be int
132
133            self.fxtype = ord(self.fxtype)
134            self.fytype = ord(self.fytype)
135            self.fztype = ord(self.fztype)
136
137            self.fexper = ord(self.fexper)
138            self.fcmnt = str(self.fcmnt)
139
140            # Convert date time to appropriate format
141            d = self.fdate
142            self.year = d >> 20
143            self.month = (d >> 16) % (2**4)
144            self.day = (d >> 11) % (2**5)
145            self.hour = (d >> 6) % (2**5)
146            self.minute = d % (2**6)
147
148            # null terminated string, replace null characters with spaces
149            # split and join to remove multiple spaces
150            try:
151                self.cmnt = ' '.join((self.fcmnt.replace('\x00', ' ')).split())
152            except Exception:
153                self.cmnt = self.fcmnt
154
155            # figure out type of file
156            if self.fnsub > 1:
157                self.dat_multi = True
158
159            if self.txyxys:
160                # x values are given
161                self.dat_fmt = '-xy'
162            elif self.txvals:
163                # only one subfile, which contains the x data
164                self.dat_fmt = 'x-y'
165            else:
166                # no x values are given, but they can be generated
167                self.dat_fmt = 'gx-y'
168
169            _LOGGER.info('{}({})'.format(self.dat_fmt, self.fnsub))
170
171            sub_pos = self.head_siz
172
173            if not self.txyxys:
174                # txyxys don't have global x data
175                if self.txvals:
176                    # if global x data is given
177                    x_dat_pos = self.head_siz
178                    x_dat_end = self.head_siz + (4 * self.fnpts)
179                    self.x = np.array(
180                        [struct.unpack_from(
181                            'f', content[x_dat_pos:x_dat_end], 4 * i)[0]
182                            for i in range(0, self.fnpts)])
183                    sub_pos = x_dat_end
184                else:
185                    # otherwise generate them
186                    self.x = np.linspace(self.ffirst, self.flast, num=self.fnpts)
187
188            # make a list of subfiles
189            self.sub = []
190
191            # if subfile directory is given
192            if self.dat_fmt == '-xy' and self.fnpts > 0:
193                self.directory = True
194                # loop over entries in directory
195                for i in range(0, self.fnsub):
196                    ssfposn, ssfsize, ssftime = struct.unpack(
197                        '<iif'.encode('utf8'), content[self.fnpts + (i * 12):self.fnpts + ((i + 1) * 12)])
198                    # add sufile, load defaults for npts and exp
199                    self.sub.append(subFile(content[ssfposn:ssfposn + ssfsize], 0, 0, True, self.tsprec, self.tmulti))
200
201            else:
202                # don't have directory, for each subfile
203                for i in range(self.fnsub):
204                    # figure out its size
205                    if self.txyxys:
206                        # use points in subfile
207                        subhead_lst = read_subheader(content[sub_pos:(sub_pos + 32)])
208                        pts = subhead_lst[6]
209                        # 4 bytes each for x and y, and 32 for subheader
210                        dat_siz = (8 * pts) + 32
211                    else:
212                        # use global points
213                        pts = self.fnpts
214                        dat_siz = (4 * pts) + 32
215
216                    sub_end = sub_pos + dat_siz
217                    # read into object, add to list
218                    self.sub.append(subFile(content[sub_pos:sub_end],
219                                            self.fnpts, self.fexp, self.txyxys, self.tsprec, self.tmulti))
220                    # update positions
221                    sub_pos = sub_end
222
223            # if log data exists
224            # flog offset to log data offset not zero (bytes)
225            if self.flogoff:
226                log_head_end = self.flogoff + self.log_siz
227                self.logsizd, \
228                    self.logsizm, \
229                    self.logtxto, \
230                    self.logbins, \
231                    self.logdsks, \
232                    self.logspar \
233                    = struct.unpack(self.logstc_str.encode('utf8'),
234                                    content[self.flogoff:log_head_end])
235                log_pos = self.flogoff + self.logtxto
236
237                log_end_pos = log_pos + self.logsizd
238
239                # line endings: get rid of any '\r' and then split on '\n'
240                self.log_content = content[log_pos:log_end_pos].replace(b'\r', b'').split(b'\n')
241
242                # split log data into dictionary based on =
243                self.log_dict = dict()
244                self.log_other = []  # put the rest into a list
245                for x in self.log_content:
246                    if x.find(b'=') >= 0:
247                        # stop it from breaking if there is more than 1 =
248                        key, value = x.split(b'=')[:2]
249                        self.log_dict[key] = value
250                    else:
251                        self.log_other.append(x)
252
253            # spacing between data
254            self.spacing = (self.flast - self.ffirst) / (self.fnpts - 1)
255
256            # call functions
257            self.set_labels()
258            self.set_exp_type()
259
260        # --------------------------------------------
261        # NEW FORMAT (MSB)
262        # --------------------------------------------
263        elif self.fversn == b'\x4c':
264            # new MSB 1st
265            print("New MSB 1st, yet to be implemented")
266            pass  # To be implemented
267
268        # --------------------------------------------
269        # OLD FORMAT
270        # --------------------------------------------
271        elif self.fversn == b'\x4d':
272            # old format
273            # oxtype -> fxtype
274            # oytype -> fytype
275            self.oftflgs, \
276                self.oversn, \
277                self.oexp, \
278                self.onpts, \
279                self.ofirst, \
280                self.olast, \
281                self.fxtype, \
282                self.fytype, \
283                self.oyear, \
284                self.omonth, \
285                self.oday, \
286                self.ohour, \
287                self.ominute, \
288                self.ores, \
289                self.opeakpt, \
290                self.onscans, \
291                self.ospare, \
292                self.ocmnt, \
293                self.ocatxt, \
294                self.osubh1 = struct.unpack(self.old_head_str.encode('utf8'),
295                                            content[:self.old_head_siz])
296
297            # Flag bits (assuming same)
298            self.tsprec, \
299                self.tcgram, \
300                self.tmulti, \
301                self.trandm, \
302                self.tordrd, \
303                self.talabs, \
304                self.txyxys, \
305                self.txvals = flag_bits(self.oftflgs)[::-1]
306
307            # fix data types
308            self.oexp = int(self.oexp)
309            self.onpts = int(self.onpts)  # can't have floating num of pts
310            self.ofirst = float(self.ofirst)
311            self.olast = float(self.olast)
312
313            # Date information
314            # !! to fix !!
315            # Year collected (0=no date/time) - MSB 4 bits are Z type
316
317            # extracted as characters, using ord
318            self.omonth = ord(self.omonth)
319            self.oday = ord(self.oday)
320            self.ohour = ord(self.ohour)
321            self.ominute = ord(self.ominute)
322
323            # number of scans (? subfiles sometimes ?)
324            self.onscans = int(self.onscans)
325
326            # null terminated strings
327            self.ores = self.ores.split(b'\x00')[0]
328            self.ocmnt = self.ocmnt.split(b'\x00')[0]
329
330            # can it have separate x values ?
331            self.x = np.linspace(self.ofirst, self.olast, num=self.onpts)
332
333            # make a list of subfiles
334            self.sub = []
335
336            # already have subheader from main header, retrace steps
337            sub_pos = self.old_head_siz - self.subhead_siz
338
339            # for each subfile
340            # in the old format we don't know how many subfiles to expect,
341            # just looping till we run out
342            i = 0
343            while True:
344                try:
345                    # read in subheader
346                    subhead_lst = read_subheader(content[sub_pos:sub_pos + self.subhead_siz])
347
348                    if subhead_lst[6] > 0:
349                        # default to subfile points, unless it is zero
350                        pts = subhead_lst[6]
351                    else:
352                        pts = self.onpts
353
354                    # figure out size of subheader
355                    dat_siz = (4 * pts)
356                    sub_end = sub_pos + self.subhead_siz + dat_siz
357
358                    # read into object, add to list
359                    # send it pts since we have already figured that out
360                    self.sub.append(subFileOld(
361                        content[sub_pos:sub_end], pts, self.oexp, self.txyxys))
362                    # update next subfile postion, and index
363                    sub_pos = sub_end
364
365                    i += 1
366                except Exception:
367                    # zero indexed, set the total number of subfile
368                    self.fnsub = i + 1
369                    break
370
371            # assuming it can't have separate x values
372            self.dat_fmt = 'gx-y'
373            _LOGGER.info('{}({})'.format(self.dat_fmt, self.fnsub))
374
375            self.fxtype = ord(self.fxtype)
376            self.fytype = ord(self.fytype)
377            # need to find from year apparently
378            self.fztype = 0
379            self.set_labels()
380
381        # --------------------------------------------
382        # SHIMADZU
383        # --------------------------------------------
384        elif self.fversn == b'\xcf':
385            print("Highly experimental format, may not work ")
386            raw_data = content[10240:]  # data starts here (maybe every time)
387            # spacing between y and x data is atleast 0 bytes
388            s_32 = chr(int('0', 2)) * 32
389            s_8 = chr(int('0', 2)) * 8  # zero double
390            dat_len = raw_data.find(s_32)
391            for i in range(dat_len, len(raw_data), 8):
392                # find first non zero double
393                if raw_data[i:i + 8] != s_8:
394                    break
395            dat_siz = int(dat_len / 8)
396            self.y = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[:dat_len])
397            self.x = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[i:i + dat_len])
398
399        else:
400            print("File type %s not supported yet. Please add issue. "
401                  % hex(ord(self.fversn)))
402            self.content = content
head_str = '<cccciddicccci9s9sh32s130s30siicchf48sfifc187s'
old_head_str = '<cchfffcchcccc8shh28s130s30s32s'
logstc_str = '<iiiii44s'
head_siz = 512
old_head_siz = 256
subhead_siz = 32
log_siz = 64
subhead1_pos = 544
length
def set_labels(self):
408    def set_labels(self):
409        """
410        Set the x, y, z axis labels using various information in file content.
411        """
412
413        # --------------------------
414        # units for x,z,w axes
415        # --------------------------
416        fxtype_op = ["Arbitrary",
417                     "Wavenumber (cm-1)",
418                     "Micrometers (um)",
419                     "Nanometers (nm)",
420                     "Seconds ",
421                     "Minutes", "Hertz (Hz)",
422                     "Kilohertz (KHz)",
423                     "Megahertz (MHz) ",
424                     "Mass (M/z)",
425                     "Parts per million (PPM)",
426                     "Days",
427                     "Years",
428                     "Raman Shift (cm-1)",
429                     "eV",
430                     "XYZ text labels in fcatxt (old 0x4D version only)",
431                     "Diode Number",
432                     "Channel",
433                     "Degrees",
434                     "Temperature (F)",
435                     "Temperature (C)",
436                     "Temperature (K)",
437                     "Data Points",
438                     "Milliseconds (mSec)",
439                     "Microseconds (uSec) ",
440                     "Nanoseconds (nSec)",
441                     "Gigahertz (GHz)",
442                     "Centimeters (cm)",
443                     "Meters (m)",
444                     "Millimeters (mm)",
445                     "Hours"]
446
447        if self.fxtype < 30:
448            self.xlabel = fxtype_op[self.fxtype]
449        else:
450            self.xlabel = "Unknown"
451
452        if self.fztype < 30:
453            self.zlabel = fxtype_op[self.fztype]
454        else:
455            self.zlabel = "Unknown"
456
457        # --------------------------
458        # units y-axis
459        # --------------------------
460
461        fytype_op = ["Arbitrary Intensity",
462                     "Interferogram",
463                     "Absorbance",
464                     "Kubelka-Munk",
465                     "Counts",
466                     "Volts",
467                     "Degrees",
468                     "Milliamps",
469                     "Millimeters",
470                     "Millivolts",
471                     "Log(1/R)",
472                     "Percent",
473                     "Intensity",
474                     "Relative Intensity",
475                     "Energy",
476                     "",
477                     "Decibel",
478                     "",
479                     "",
480                     "Temperature (F)",
481                     "Temperature (C)",
482                     "Temperature (K)",
483                     "Index of Refraction [N]",
484                     "Extinction Coeff. [K]",
485                     "Real",
486                     "Imaginary",
487                     "Complex"]
488
489        fytype_op2 = ["Transmission",
490                      "Reflectance",
491                      "Arbitrary or Single Beam with Valley Peaks",
492                      "Emission"]
493
494        if self.fytype < 27:
495            self.ylabel = fytype_op[self.fytype]
496        elif self.fytype > 127 and self.fytype < 132:
497            self.ylabel = fytype_op2[self.fytype - 128]
498        else:
499            self.ylabel = "Unknown"
500
501        # --------------------------
502        # check if labels are included as text
503        # --------------------------
504
505        # split it based on 00 string
506        # format x, y, z
507        if self.talabs:
508            ll = self.fcatxt.split(b'\x00')
509            if len(ll) > 2:
510                # make sure there are enough items to extract from
511                xl, yl, zl = ll[:3]
512
513                # overwrite only if non zero
514                if len(xl) > 0:
515                    self.xlabel = xl
516                if len(yl) > 0:
517                    self.ylabel = yl
518                if len(zl) > 0:
519                    self.zlabel = zl

Set the x, y, z axis labels using various information in file content.

def set_exp_type(self):
521    def set_exp_type(self):
522        """
523        Set the experiment type.
524        """
525
526        fexper_op = ["General SPC",
527                     "Gas Chromatogram",
528                     "General Chromatogram",
529                     "HPLC Chromatogram",
530                     "FT-IR, FT-NIR, FT-Raman Spectrum or Igram",
531                     "NIR Spectrum",
532                     "UV-VIS Spectrum",
533                     "X-ray Diffraction Spectrum",
534                     "Mass Spectrum ",
535                     "NMR Spectrum or FID",
536                     "Raman Spectrum",
537                     "Fluorescence Spectrum",
538                     "Atomic Spectrum",
539                     "Chromatography Diode Array Spectra"]
540
541        self.exp_type = fexper_op[self.fexper]

Set the experiment type.

def data_txt(self, delimiter='\t', newline='\n'):
546    def data_txt(self, delimiter='\t', newline='\n'):
547        r"""
548        Returns x,y column data as a string variable, can be printed to standard output or
549        fed to text file.
550
551        Arguments
552        ---------
553        delimiter: chr (default='\t')
554            delimiter character for column separation
555        newline: chr (default='\n')
556            newline character, may want to use '\r\n' for Windows based output
557
558        Example
559        -------
560        >>> spc_file = File('/path/to/ftir.spc')
561        >>> spc_file.data_txt(newline='\r\n')
562        """
563
564        dat = ''
565        if self.fnsub == 1:
566            if self.dat_fmt.endswith('-xy'):
567                x = self.sub[0].x
568            else:
569                x = self.x
570            y = self.sub[0].y
571
572            for x1, y1 in zip(x, y):
573                dat += '{}{}{}{}'.format(x1, delimiter, y1, newline)
574        else:
575            if not self.dat_fmt.endswith('-xy'):
576                # does not have separate x data
577                for i in range(len(self.x)):
578                    dat += '{}'.format(self.x[i])
579                    for s in self.sub:
580                        dat += '{}{}'.format(delimiter, s.y[i])
581                    dat += newline
582            else:
583                # txyxy format, return one long xy file with subfiles
584                # separated by blank lines
585                for i in self.sub:
586                    for x1, y1 in zip(i.x, i.y):
587                        dat += '{}{}{}{}'.format(x1, delimiter, y1, newline)
588                    dat += newline
589        return dat

Returns x,y column data as a string variable, can be printed to standard output or fed to text file.

Arguments

delimiter: chr (default='\t') delimiter character for column separation newline: chr (default='\n') newline character, may want to use '\r\n' for Windows based output

Example

>>> spc_file = File('/path/to/ftirspc_spectra.spc')
>>> spc_file.data_txt(newline='\r\n')
def write_file(self, path, delimiter='\t', newline='\n'):
591    def write_file(self, path, delimiter='\t', newline='\n'):
592        """
593        Output x, y data to tab-separated text file.
594
595        Arguments
596        ---------
597        path: str
598            full path to output file including extension
599        delimiter: chr (default='\t')
600            delimiter character for column separation
601        newline: chr (default='\n')
602            newline character, may want to use '\r\n' for Windows based output
603
604        Example
605        -------
606        >>> spc_file = File('/path/to/ftir.spc')
607        >>> spc_file.writefile('/Users/home/output.txt', delimiter=',')
608        """
609        with open(path, 'w') as f:
610            f.write(self.data_txt(delimiter, newline))

Output x, y data to tab-separated text file.

    Arguments
    ---------
    path: str
        full path to output file including extension
    delimiter: chr (default='       ')
        delimiter character for column separation
    newline: chr (default='

') newline character, may want to use ' ' for Windows based output

    Example
    -------
    >>> spc_file = File('/path/to/ftirspc_spectra.spc')
    >>> spc_file.writefile('/Users/home/output.txt', delimiter=',')
def print_metadata(self):
612    def print_metadata(self):
613        """
614        Print out select metadata.
615        """
616        print("Scan: ", self.log_dict['Comment'], "\n",
617              float(self.log_dict['Start']), "to ",
618              float(self.log_dict['End']), "; ",
619              float(self.log_dict['Increment']), "cm-1;",
620              float(self.log_dict['Integration Time']), "s integration time")

Print out select metadata.

def plot(self):
622    def plot(self):
623        """
624        Plot data and return figure object.
625
626        Requires matplotlib installed
627
628        Example
629        -------
630        >>> spc_file = File('/path/to/ftir.spc')
631        >>> spc_file.plot()
632        """
633        import matplotlib.pyplot as plt
634        if self.dat_fmt.endswith('-xy'):
635            for s in self.sub:
636                plt.plot(s.x, s.y)
637        else:
638            x = self.x
639            for s in self.sub:
640                plt.plot(x, s.y)
641        plt.xlabel(self.xlabel)
642        plt.ylabel(self.ylabel)
643        return plt.gcf()

Plot data and return figure object.

Requires matplotlib installed

Example

>>> spc_file = File('/path/to/ftirspc_spectra.spc')
>>> spc_file.plot()
def debug_info(self):
645    def debug_info(self):
646        """
647        Print debugging information extracted from flags and header information.
648
649        Example
650        -------
651        >>> spc_file = File('/path/to/ftir.spc')
652        >>> spc_file.debug_info()
653        """
654        print("\nDEBUG INFO\nFlags:\n")
655        # Flag bits
656        if self.tsprec:
657            print("16-bit y data")
658        if self.tcgram:
659            print("enable fexper")
660        if self.tmulti:
661            print("multiple traces")
662        if self.trandm:
663            print("arb time (z) values")
664        if self.tordrd:
665            print("ordered but uneven subtimes")
666        if self.talabs:
667            print("use fcatxt axis not fxtype")
668        if self.txyxys:
669            print("each subfile has own x's")
670        if self.txvals:
671            print("floating x-value array preceeds y's")
672
673        print('----\n')
674        # spc format version
675        if self.fversn == chr(0x4b):
676            self.pr_versn = "new LSB 1st"
677        elif self.fversn == chr(0x4c):
678            self.pr_versn = "new MSB 1st"
679        elif self.fversn == chr(0x4d):
680            self.pr_versn = "old format"
681        else:
682            self.pr_versn = "unknown version"
683
684        print("Version:", self.pr_versn)
685
686        # subfiles
687        if self.fnsub == 1:
688            print("Single file only")
689        else:
690            print("Multiple subfiles:", self.fnsub)
691
692        # multiple y values
693        if self.tmulti:
694            print("Multiple y-values")
695        else:
696            print("Single set of y-values")
697
698        # print "There are ", self.fnpts, \
699        #    " points between ", self.ffirst, \
700        #    " and ", self.flast, \
701        #    " in steps of ", self.pr_spacing

Print debugging information extracted from flags and header information.

Example

>>> spc_file = File('/path/to/ftirspc_spectra.spc')
>>> spc_file.debug_info()