Visual Servoing Platform version 3.7.0
Loading...
Searching...
No Matches
vpIoTools_npy.cpp
1/*
2 * ViSP, open source Visual Servoing Platform software.
3 * Copyright (C) 2005 - 2024 by Inria. All rights reserved.
4 *
5 * This software is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 * See the file LICENSE.txt at the root directory of this source
10 * distribution for additional information about the GNU GPL.
11 *
12 * For using ViSP with software that can not be combined with the GNU
13 * GPL, please contact Inria about acquiring a ViSP Professional
14 * Edition License.
15 *
16 * See https://visp.inria.fr for more information.
17 *
18 * This software was developed at:
19 * Inria Rennes - Bretagne Atlantique
20 * Campus Universitaire de Beaulieu
21 * 35042 Rennes Cedex
22 * France
23 *
24 * If you have questions regarding the use of this file, please contact
25 * Inria at visp@inria.fr
26 *
27 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
28 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
29 *
30 * Description:
31 * Io tools dedicated to npy.
32 */
33
34#include <visp3/core/vpConfig.h>
35#include <visp3/core/vpIoTools.h>
36
37#if defined(VISP_HAVE_MINIZ) && defined(VISP_HAVE_WORKING_REGEX)
38#define USE_ZLIB_API 0
39
40#if !USE_ZLIB_API
41// See: https://github.com/BinomialLLC/basis_universal/blob/master/encoder/basisu_miniz.h
42// Apache License, Version 2.0
43#include "basisu_miniz.h"
44
45using namespace buminiz;
46#else
47#include <zlib.h>
48#endif
49
50// To avoid warnings such as: warning: unused variable ‘littleEndian’ [-Wunused-variable]
51#define UNUSED(x) ((void)(x)) // see: https://stackoverflow.com/a/777359
52
53// Copyright (C) 2011 Carl Rogers
54// Released under MIT License
55// license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
56
57
58// anonymous namespace
59namespace
60{
61#if defined(ENABLE_VISP_NAMESPACE)
62using namespace VISP_NAMESPACE_NAME; // for vpEndian calls
63#endif
64
65void reverse_data(std::shared_ptr<std::vector<char> > &data_holder, const std::vector<size_t> &shape,
66 size_t word_size, char data_type)
67{
68 if (!shape.empty()) {
69 size_t total_size = shape[0];
70 for (size_t i = 1; i < shape.size(); i++) {
71 total_size *= shape[i];
72 }
73
74 // std::complex
75 if (data_type == 'c') {
76 const size_t half_word_size = word_size / 2;
77 for (size_t i = 0; i < total_size; i++) {
78 // real
79 std::reverse(data_holder->begin() + i*word_size, data_holder->begin() + i*word_size + half_word_size);
80 // imag
81 std::reverse(data_holder->begin() + i*word_size + half_word_size, data_holder->begin() + (i+1)*word_size);
82 }
83 }
84 else if (data_type != 'U') {
85 for (size_t i = 0; i < total_size; i++) {
86 std::reverse(data_holder->begin() + i*word_size, data_holder->begin() + (i+1)*word_size);
87 }
88 }
89 }
90 else {
91 // 0d-array (number) with NumPy
92 if (data_type == 'c') {
93 const size_t half_word_size = word_size / 2;
94 // real
95 std::reverse(data_holder->begin(), data_holder->begin() + half_word_size);
96 // imag
97 std::reverse(data_holder->begin() + half_word_size, data_holder->begin() + word_size);
98 }
99 else if (data_type != 'U') {
100 std::reverse(data_holder->begin(), data_holder->begin() + word_size);
101 }
102 }
103
104 if (data_type == 'U') { // special case to handle UTF-32 string data
105 size_t utf32_size = 4;
106 for (size_t i = 0; i < data_holder->size(); i += utf32_size) {
107 std::reverse(data_holder->begin() + i, data_holder->begin() + (i+utf32_size)); // NumPy saves string in UTF-32
108 }
109 }
110}
111
112uint16_t swap16bits_if(uint16_t val, bool swap)
113{
114 if (swap) {
115 return vpEndian::swap16bits(val);
116 }
117
118 return val;
119}
120
121uint32_t swap32bits_if(uint32_t val, bool swap)
122{
123 if (swap) {
124 return vpEndian::swap32bits(val);
125 }
126
127 return val;
128}
129
130// https://github.com/rogersce/cnpy/pull/78/files
131// https://github.com/nmcclatchey/Bugfix-for-cnpy/blob/e148a5ce5db80fa3e28ce6d551343cfc8ebdc832/cnpy.cpp#L285
132struct AutoCloser
133{
134 FILE *fp;
135 AutoCloser() : fp(nullptr) { }
136 ~AutoCloser()
137 {
138 if (fp != nullptr) {
139 fclose(fp);
140 }
141 }
142};
143} // anonymous namespace
144
146{
147 int x = 1;
148 return (((reinterpret_cast<char *>(&x))[0]) ? '<' : '>');
149}
150
151char visp::cnpy::map_type(const std::type_info &t)
152{
153 if (t == typeid(float)) { return 'f'; }
154 if (t == typeid(double)) { return 'f'; }
155 if (t == typeid(long double)) { return 'f'; }
156
157 if (t == typeid(int)) { return 'i'; }
158 if (t == typeid(char)) { return 'i'; }
159 if (t == typeid(short)) { return 'i'; }
160 if (t == typeid(long)) { return 'i'; }
161 if (t == typeid(long long)) { return 'i'; }
162
163 if (t == typeid(unsigned char)) { return 'u'; }
164 if (t == typeid(unsigned short)) { return 'u'; }
165 if (t == typeid(unsigned long)) { return 'u'; }
166 if (t == typeid(unsigned long long)) { return 'u'; }
167 if (t == typeid(unsigned int)) { return 'u'; }
168
169 if (t == typeid(bool)) { return 'b'; }
170
171 if (t == typeid(std::complex<float>)) { return 'c'; }
172 if (t == typeid(std::complex<double>)) { return 'c'; }
173 if (t == typeid(std::complex<long double>)) { return 'c'; }
174
175 if (t == typeid(std::string)) { return 'U'; }
176
177 else { return '?'; }
178}
179
180void visp::cnpy::parse_npy_header(unsigned char *buffer, size_t &word_size, std::vector<size_t> &shape,
181 bool &fortran_order, bool &little_endian, char &data_type)
182{
183 uint16_t header_len = *reinterpret_cast<uint16_t *>(buffer+8);
184 std::string header(reinterpret_cast<char *>(buffer+9), header_len);
185
186 //fortran order
187 size_t loc1 = header.find("fortran_order")+16;
188 fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
189
190 //shape
191 loc1 = header.find("(");
192 size_t loc2 = header.find(")");
193
194 std::regex num_regex("[0-9][0-9]*");
195 std::smatch sm;
196 shape.clear();
197
198 std::string str_shape = header.substr(loc1+1, loc2-loc1-1);
199 while (std::regex_search(str_shape, sm, num_regex)) {
200 // https://github.com/rogersce/cnpy/commit/ca6c0ce5bed57e3b5b64aede4f39aa07a9e71f5e
201 shape.push_back(std::stoll(sm[0].str()));
202 str_shape = sm.suffix().str();
203 }
204
205 //endian, word size, data type
206 //byte order code | stands for not applicable.
207 //not sure when this applies except for byte array
208 loc1 = header.find("descr")+9;
209 little_endian = ((header[loc1] == '<') || (header[loc1] == '|') ? true : false);
210 data_type = header[loc1+1];
211
212 std::string str_ws = header.substr(loc1+2);
213 loc2 = str_ws.find("'");
214 word_size = atoll(str_ws.substr(0, loc2).c_str());
215}
216
217void visp::cnpy::parse_npy_header(FILE *fp, size_t &word_size, std::vector<size_t> &shape,
218 bool &fortran_order, bool &little_endian, char &data_type)
219{
220 char buffer[256];
221 size_t res = fread(buffer, sizeof(char), 11, fp);
222 if (res != 11) {
223 std::ostringstream oss;
224 oss << "parse_npy_header: failed fread, res=" << res;
225 throw std::runtime_error(oss.str());
226 }
227 std::string header = fgets(buffer, 256, fp);
228 assert(header[header.size()-1] == '\n');
229
230 size_t loc1, loc2;
231
232 //fortran order
233 loc1 = header.find("fortran_order");
234 if (loc1 == std::string::npos) {
235 throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'");
236 }
237 loc1 += 16;
238 fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
239
240 //shape
241 loc1 = header.find("(");
242 loc2 = header.find(")");
243 if ((loc1 == std::string::npos) || (loc2 == std::string::npos)) {
244 throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'");
245 }
246
247 std::regex num_regex("[0-9][0-9]*");
248 std::smatch sm;
249 shape.clear();
250
251 std::string str_shape = header.substr(loc1+1, loc2-loc1-1);
252 while (std::regex_search(str_shape, sm, num_regex)) {
253 // https://github.com/rogersce/cnpy/commit/ca6c0ce5bed57e3b5b64aede4f39aa07a9e71f5e
254 shape.push_back(std::stoll(sm[0].str()));
255 str_shape = sm.suffix().str();
256 }
257
258 //endian, word size, data type
259 //byte order code | stands for not applicable.
260 //not sure when this applies except for byte array
261 loc1 = header.find("descr");
262 if (loc1 == std::string::npos) {
263 throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'");
264 }
265 loc1 += 9;
266 little_endian = ((header[loc1] == '<') || (header[loc1] == '|') ? true : false);
267 data_type = header[loc1+1];
268
269 std::string str_ws = header.substr(loc1+2);
270 loc2 = str_ws.find("'");
271 word_size = atoll(str_ws.substr(0, loc2).c_str());
272 if (data_type == 'U') {
273 word_size *= 4; // UTF-32 with NumPy
274 }
275}
276
277void visp::cnpy::parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
278{
279 std::vector<char> footer(22);
280 fseek(fp, -22, SEEK_END);
281 size_t res = fread(&footer[0], sizeof(char), 22, fp);
282 if (res != 22) {
283 std::ostringstream oss;
284 oss << "parse_zip_footer: failed fread, res=" << res;
285 throw std::runtime_error(oss.str());
286 }
287
288 uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
289#ifdef VISP_BIG_ENDIAN
290 disk_no = vpEndian::swap16bits(*(uint16_t *)&footer[4]);
291 disk_start = vpEndian::swap16bits(*(uint16_t *)&footer[6]);
292 nrecs_on_disk = vpEndian::swap16bits(*(uint16_t *)&footer[8]);
293 nrecs = vpEndian::swap16bits(*(uint16_t *)&footer[10]);
294 global_header_size = vpEndian::swap32bits(*(uint32_t *)&footer[12]);
295 global_header_offset = vpEndian::swap32bits(*(uint32_t *)&footer[16]);
296 comment_len = vpEndian::swap16bits(*(uint16_t *)&footer[20]);
297#else
298 disk_no = *(uint16_t *)&footer[4];
299 disk_start = *(uint16_t *)&footer[6];
300 nrecs_on_disk = *(uint16_t *)&footer[8];
301 nrecs = *(uint16_t *)&footer[10];
302 global_header_size = *(uint32_t *)&footer[12];
303 global_header_offset = *(uint32_t *)&footer[16];
304 comment_len = *(uint16_t *)&footer[20];
305#endif
306
307 UNUSED(disk_no); assert(disk_no == 0);
308 UNUSED(disk_start); assert(disk_start == 0);
309 UNUSED(nrecs_on_disk); assert(nrecs_on_disk == nrecs);
310 UNUSED(comment_len); assert(comment_len == 0);
311}
312
313visp::cnpy::NpyArray load_the_npy_file(FILE *fp)
314{
315 std::vector<size_t> shape;
316 size_t word_size;
317 bool fortran_order, little_endian;
318 char data_type = 'i'; // integer type
319 visp::cnpy::parse_npy_header(fp, word_size, shape, fortran_order, little_endian, data_type);
320
321 visp::cnpy::NpyArray arr(shape, word_size, fortran_order, data_type);
322 size_t nread = fread(arr.data<char>(), 1, arr.num_bytes(), fp);
323 if (nread != arr.num_bytes()) {
324 std::ostringstream oss;
325 oss << "load_the_npy_file: failed fread, nread=" << nread << " ; num_bytes=" << arr.num_bytes();
326 throw std::runtime_error(oss.str());
327 }
328
329#ifdef VISP_LITTLE_ENDIAN
330 if (!little_endian) {
331 reverse_data(arr.data_holder, arr.shape, arr.word_size, data_type);
332 }
333#else
334 if (little_endian) {
335 reverse_data(arr.data_holder, arr.shape, arr.word_size, data_type);
336 }
337#endif
338 return arr;
339}
340
341visp::cnpy::NpyArray load_the_npz_array(FILE *fp, uint32_t compr_bytes, uint32_t uncompr_bytes)
342{
343 std::vector<unsigned char> buffer_compr(compr_bytes);
344 std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
345 size_t nread = fread(&buffer_compr[0], 1, compr_bytes, fp);
346 if (nread != compr_bytes) {
347 std::ostringstream oss;
348 oss << "load_the_npz_array: failed fread, nread=" << nread << " ; compr_bytes=" << compr_bytes;
349 throw std::runtime_error(oss.str());
350 }
351
352 z_stream d_stream;
353
354 d_stream.zalloc = Z_NULL;
355 d_stream.zfree = Z_NULL;
356 d_stream.opaque = Z_NULL;
357 d_stream.avail_in = 0;
358 d_stream.next_in = Z_NULL;
359 int err = inflateInit2(&d_stream, -MAX_WBITS);
360 // https://github.com/rogersce/cnpy/commit/3ed2bc4063c455269b37af63442c595ee1bd60e1
361 if (err != Z_OK) {
362 std::ostringstream oss;
363 oss << "load_the_npz_array: zlib inflateInit2 failed ; err=" << err;
364 throw std::runtime_error(oss.str());
365 }
366
367 d_stream.avail_in = compr_bytes;
368 d_stream.next_in = &buffer_compr[0];
369 d_stream.avail_out = uncompr_bytes;
370 d_stream.next_out = &buffer_uncompr[0];
371
372 err = inflate(&d_stream, Z_FINISH);
373 if (err != Z_OK) {
374 std::ostringstream oss;
375 oss << "load_the_npz_array: zlib inflate failed ; err=" << err;
376 throw std::runtime_error(oss.str());
377 }
378 err = inflateEnd(&d_stream);
379 if (err != Z_OK) {
380 std::ostringstream oss;
381 oss << "load_the_npz_array: zlib inflateEnd failed ; err=" << err;
382 throw std::runtime_error(oss.str());
383 }
384
385 std::vector<size_t> shape;
386 size_t word_size;
387 bool fortran_order;
388 bool little_endian = true;
389 char data_type = 'i'; // integer type
390 visp::cnpy::parse_npy_header(&buffer_uncompr[0], word_size, shape, fortran_order, little_endian, data_type);
391
392 visp::cnpy::NpyArray array(shape, word_size, fortran_order, data_type);
393
394 size_t offset = uncompr_bytes - array.num_bytes();
395 memcpy(array.data<unsigned char>(), &buffer_uncompr[0]+offset, array.num_bytes());
396
397#ifdef VISP_LITTLE_ENDIAN
398 if (!little_endian) {
399 reverse_data(array.data_holder, array.shape, array.word_size, data_type);
400 }
401#else
402 if (little_endian) {
403 reverse_data(array.data_holder, array.shape, array.word_size, data_type);
404 }
405#endif
406
407 return array;
408}
409
421{
422 if (!vpIoTools::checkFilename(fname)) {
423 throw vpException(vpException::ioError, "This file does not exist: " + fname);
424 }
425
426 AutoCloser closer;
427 closer.fp = fopen(fname.c_str(), "rb");
428
429 if (!closer.fp) {
430 throw std::runtime_error("npz_load: Error! Unable to open file " + fname + "!");
431 }
432
433 visp::cnpy::npz_t arrays;
434 bool quit = false;
435 const unsigned int index_2 = 2;
436 const unsigned int index_3 = 3;
437 const unsigned int index_26 = 26;
438 const unsigned int index_28 = 28;
439 const unsigned int val_8 = 8;
440 const unsigned int val_18 = 18;
441 const unsigned int val_22 = 22;
442 const unsigned int val_30 = 30;
443
444 bool host_is_LE = true;
445#ifndef VISP_LITTLE_ENDIAN
446 host_is_LE = false;
447#endif
448
449 const bool header_file_is_LE = true;
450 bool same_endianness = (host_is_LE == header_file_is_LE);
451 while (!quit) {
452 std::vector<char> local_header(val_30);
453 size_t headerres = fread(&local_header[0], sizeof(char), val_30, closer.fp);
454 if (headerres != 30) {
455 throw std::runtime_error("npz_load: failed fread 1");
456 }
457
458 //if we've reached the global header, stop reading
459 if ((local_header[index_2] != 0x03) || (local_header[index_3] != 0x04)) {
460 quit = true;
461 }
462 else {
463 //read in the variable name
464 uint16_t name_len = swap16bits_if(*(uint16_t *)&local_header[index_26], !same_endianness);
465 std::string varname(name_len, ' ');
466 size_t vname_res = fread(&varname[0], sizeof(char), name_len, closer.fp);
467 if (vname_res != name_len) {
468 throw std::runtime_error("npz_load: failed fread 2");
469 }
470
471 //erase the lagging .npy
472 varname.erase(varname.end()-4, varname.end());
473
474 //read in the extra field
475 uint16_t extra_field_len = swap16bits_if(*(uint16_t *)&local_header[index_28], !same_endianness);
476 if (extra_field_len > 0) {
477 std::vector<char> buff(extra_field_len);
478 size_t efield_res = fread(&buff[0], sizeof(char), extra_field_len, closer.fp);
479 if (efield_res != extra_field_len) {
480 throw std::runtime_error("npz_load: failed fread 3");
481 }
482 }
483
484 uint16_t compr_method = swap16bits_if(*reinterpret_cast<uint16_t *>(&local_header[0] + val_8), !same_endianness);
485 uint32_t compr_bytes = swap32bits_if(*reinterpret_cast<uint32_t *>(&local_header[0] + val_18), !same_endianness);
486 uint32_t uncompr_bytes = swap32bits_if(*reinterpret_cast<uint32_t *>(&local_header[0] + val_22), !same_endianness);
487
488 if (compr_method == 0) {
489 arrays[varname] = load_the_npy_file(closer.fp);
490 }
491 else {
492 arrays[varname] = load_the_npz_array(closer.fp, compr_bytes, uncompr_bytes);
493 }
494 }
495 }
496
497 return arrays;
498}
499
511visp::cnpy::NpyArray visp::cnpy::npz_load(const std::string &fname, const std::string &varname)
512{
513 if (!vpIoTools::checkFilename(fname)) {
514 throw vpException(vpException::ioError, "This file does not exist: " + fname);
515 }
516
517 AutoCloser closer;
518 closer.fp = fopen(fname.c_str(), "rb");
519
520 if (!closer.fp) {
521 throw std::runtime_error("npz_load: Unable to open file " + fname + "!");
522 }
523
524 bool quit = false;
525 const unsigned int index_2 = 2;
526 const unsigned int index_3 = 3;
527 const unsigned int index_26 = 26;
528 const unsigned int index_28 = 28;
529 const unsigned int val_8 = 8;
530 const unsigned int val_18 = 18;
531 const unsigned int val_22 = 22;
532 const unsigned int val_30 = 30;
533
534 bool host_is_LE = true;
535#ifndef VISP_LITTLE_ENDIAN
536 host_is_LE = false;
537#endif
538
539 const bool header_file_is_LE = true;
540 bool same_endianness = (host_is_LE == header_file_is_LE);
541 while (!quit) {
542 std::vector<char> local_header(val_30);
543 size_t header_res = fread(&local_header[0], sizeof(char), val_30, closer.fp);
544 if (header_res != 30) {
545 throw std::runtime_error("npz_load 2: failed fread");
546 }
547
548 //if we've reached the global header, stop reading
549 if ((local_header[index_2] != 0x03) || (local_header[index_3] != 0x04)) {
550 quit = true;
551 }
552 else {
553 //read in the variable name
554 uint16_t name_len = swap16bits_if(*(uint16_t *)&local_header[index_26], !same_endianness);
555 std::string vname(name_len, ' ');
556 size_t vname_res = fread(&vname[0], sizeof(char), name_len, closer.fp);
557 if (vname_res != name_len) {
558 throw std::runtime_error("npz_load 2: failed fread");
559 }
560 vname.erase(vname.end()-4, vname.end()); //erase the lagging .npy
561
562 //read in the extra field
563 uint16_t extra_field_len = swap16bits_if(*(uint16_t *)&local_header[index_28], !same_endianness);
564 fseek(closer.fp, extra_field_len, SEEK_CUR); //skip past the extra field
565
566 uint16_t compr_method = swap16bits_if(*reinterpret_cast<uint16_t *>(&local_header[0] + val_8), !same_endianness);
567 uint32_t compr_bytes = swap32bits_if(*reinterpret_cast<uint32_t *>(&local_header[0] + val_18), !same_endianness);
568 uint32_t uncompr_bytes = swap32bits_if(*reinterpret_cast<uint32_t *>(&local_header[0] + val_22), !same_endianness);
569
570 if (vname == varname) {
571 NpyArray array = (compr_method == 0) ? load_the_npy_file(closer.fp) : load_the_npz_array(closer.fp, compr_bytes, uncompr_bytes);
572 return array;
573 }
574 else {
575 //skip past the data
576 uint32_t size = swap32bits_if(*(uint32_t *)&local_header[22], !same_endianness);
577 fseek(closer.fp, size, SEEK_CUR);
578 }
579 }
580 }
581
582 //if we get here, we haven't found the variable in the file
583 throw std::runtime_error("npz_load 2: Variable name " + varname + " not found in " + fname);
584}
585
595{
596 if (!vpIoTools::checkFilename(fname)) {
597 throw vpException(vpException::ioError, "This file does not exist: " + fname);
598 }
599
600 AutoCloser closer;
601 closer.fp = fopen(fname.c_str(), "rb");
602
603 if (!closer.fp) {
604 throw std::runtime_error("npy_load: Unable to open file " + fname + "!");
605 }
606
607 NpyArray arr = load_the_npy_file(closer.fp);
608
609 return arr;
610}
611
612namespace visp
613{
614namespace cnpy
615{
616// 000000e0 ff ff 12 00 14 00 4d 79 20 73 74 72 69 6e 67 20 |......My string |
617// 000000f0 64 61 74 61 2e 6e 70 79 01 00 10 00 8c 00 00 00 |data.npy........|
618// 00000100 00 00 00 00 8c 00 00 00 00 00 00 00 93 4e 55 4d |.............NUM|
619// 00000110 50 59 01 00 76 00 7b 27 64 65 73 63 72 27 3a 20 |PY..v.{'descr': |
620// 00000120 27 3c 55 33 27 2c 20 27 66 6f 72 74 72 61 6e 5f |'<U3', 'fortran_|
621// 00000130 6f 72 64 65 72 27 3a 20 46 61 6c 73 65 2c 20 27 |order': False, '|
622// 00000140 73 68 61 70 65 27 3a 20 28 29 2c 20 7d 20 20 20 |shape': (), } |
623// 00000150 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 | |
624// 00000160 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 | |
625// 00000170 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 | |
626// 00000180 20 20 20 20 20 20 20 20 20 20 20 0a 61 00 00 00 | .a...|
627// 00000190 62 00 00 00 63 00 00 00 50 4b 03 04 2d 00 00 00 |b...c...PK..-...|
628// 000001a0 00 00 00 00 21 00 76 38 d2 58 ff ff ff ff ff ff |....!.v8.X......|
629// 000001b0 ff ff 16 00 14 00 4d 79 20 76 65 63 20 73 74 72 |......My vec str|
630// 000001c0 69 6e 67 20 64 61 74 61 2e 6e 70 79 01 00 10 00 |ing data.npy....|
631// 000001d0 40 01 00 00 00 00 00 00 40 01 00 00 00 00 00 00 |@.......@.......|
632// 000001e0 93 4e 55 4d 50 59 01 00 76 00 7b 27 64 65 73 63 |.NUMPY..v.{'desc|
633// 000001f0 72 27 3a 20 27 3c 55 38 27 2c 20 27 66 6f 72 74 |r': '<U8', 'fort|
634// 00000200 72 61 6e 5f 6f 72 64 65 72 27 3a 20 46 61 6c 73 |ran_order': Fals|
635// 00000210 65 2c 20 27 73 68 61 70 65 27 3a 20 28 32 2c 20 |e, 'shape': (2, |
636// 00000220 33 29 2c 20 7d 20 20 20 20 20 20 20 20 20 20 20 |3), } |
637// 00000230 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 | |
638// 00000240 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 | |
639// 00000250 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 0a | .|
640// 00000260 61 00 00 00 62 00 00 00 63 00 00 00 64 00 00 00 |a...b...c...d...|
641// 00000270 20 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ...............|
642// 00000280 65 00 00 00 66 00 00 00 67 00 00 00 68 00 00 00 |e...f...g...h...|
643// 00000290 69 00 00 00 6a 00 00 00 6b 00 00 00 6c 00 00 00 |i...j...k...l...|
644// 000002a0 6d 00 00 00 6e 00 00 00 00 00 00 00 00 00 00 00 |m...n...........|
645// 000002b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
646// 000002c0 6f 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |o...............|
647// 000002d0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
648// 000002e0 70 00 00 00 71 00 00 00 00 00 00 00 00 00 00 00 |p...q...........|
649// 000002f0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
650// 00000300 72 00 00 00 73 00 00 00 74 00 00 00 00 00 00 00 |r...s...t.......|
651// 00000310 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
652// 00000320 50 4b 01 02 2d 03 2d 00 00 00 00 00 00 00 21 00 |PK..-.-.......!.|
653// 00000330 4b f2 4d a7 84 00 00 00 84 00 00 00 12 00 00 00 |K.M.............|
654//
655// Create header for special string case
656std::vector<char> create_npy_header_string(const std::vector<size_t> &shape, const std::vector<size_t> &lengths)
657{
658 size_t max_length = *std::max_element(lengths.begin(), lengths.end());
659
660 std::vector<char> dict;
661 dict += "{'descr': '";
662 dict += BigEndianTest();
663 dict += map_type(typeid(std::string));
664 dict += std::to_string(max_length);
665 if (shape.size() > 0) {
666 dict += "', 'fortran_order': False, 'shape': (";
667 dict += std::to_string(shape[0]);
668 for (size_t i = 1; i < shape.size(); ++i) {
669 dict += ", ";
670 dict += std::to_string(shape[i]);
671 }
672 if (shape.size() == 1) dict += ",";
673 dict += "), }";
674 }
675 else {
676 dict += "', 'fortran_order': False, 'shape': (";
677 dict += "), }";
678 }
679 //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
680 int remainder = 16 - (10 + dict.size()) % 16;
681 dict.insert(dict.end(), remainder, ' ');
682 dict.back() = '\n';
683
684 std::vector<char> header;
685 header += static_cast<char>(0x93);
686 header += "NUMPY";
687 header += static_cast<char>(0x01); //major version of numpy format
688 header += static_cast<char>(0x00); //minor version of numpy format
689#ifdef VISP_BIG_ENDIAN
690 header += vpEndian::swap16bits(static_cast<uint16_t>(dict.size()));
691#else
692 header += static_cast<uint16_t>(dict.size());
693#endif
694 header.insert(header.end(), dict.begin(), dict.end());
695
696 return header;
697}
698
699// std::vector<char> utf8_to_utf32_vec(const std::string &utf8)
700// {
701// // SO: https://stackoverflow.com/questions/52703630/convert-c-stdstring-to-utf-16-le-encoded-string/52703954#52703954
702// // SO and ChatGPT gives approximatively the same logic
703
704// // But since I don't know how C++ deals with utf-16/utf-32 and std::string and for simplicity we do something stupid
705// // instead (see utf8_to_utf32_vec() func below).
706
707// std::wstring_convert<std::codecvt_utf8<char32_t, 1114111UL, std::codecvt_mode::little_endian>, char32_t> cnv;
708// std::u32string s = cnv.from_bytes(utf8);
709// if (cnv.converted() < utf8.size()) {
710// throw std::runtime_error("incomplete conversion");
711// }
712
713// std::vector<char> utf32Vector;
714// for (char32_t uc : s) {
715// utf32Vector.push_back(static_cast<char>(uc & 0xFF));
716// utf32Vector.push_back(static_cast<char>((uc >> 8) & 0xFF));
717// utf32Vector.push_back(static_cast<char>((uc >> 16) & 0xFF));
718// utf32Vector.push_back(static_cast<char>((uc >> 24) & 0xFF));
719// }
720
721// return utf32Vector;
722// }
723
724// NumPy saves a string character using 32-bits:
725// - https://github.com/numpy/numpy/issues/15347
726// Here for simplicity we assume std::string is utf-8 and we simply pad the vector with 0-val (NULL character).
727std::vector<char> utf8_to_utf32_vec_pad(const std::string &utf8, const std::size_t &max_size)
728{
729 std::vector<char> utf32Vector;
730 utf32Vector.resize(4*max_size, 0);
731
732 for (size_t i = 0, idx = 0; i < utf8.length(); i++, idx += 4) {
733#ifdef VISP_BIG_ENDIAN
734 utf32Vector[idx+3] = utf8[i];
735#else
736 utf32Vector[idx] = utf8[i];
737#endif
738 }
739
740 return utf32Vector;
741}
742} // cnpy
743} // visp
744
758void visp::cnpy::npz_save(const std::string &zipname, std::string fname, const std::vector<std::string> &data_vec, const std::vector<size_t> &shape, const std::string &mode)
759{
760 if (data_vec.empty()) {
761 vpException(vpException::badValue, "Input string data is empty.");
762 }
763 if (shape.empty()) {
764 vpException(vpException::dimensionError, "Input string data shape vec is empty.");
765 }
766
767 //first, append a .npy to the fname
768 fname += ".npy";
769
770 //now, on with the show
771 FILE *fp = NULL;
772 uint16_t nrecs = 0;
773 size_t global_header_offset = 0;
774 std::vector<char> global_header;
775
776 if (mode == "a") fp = fopen(zipname.c_str(), "r+b");
777
778 if (fp) {
779 //zip file exists. we need to add a new npy file to it.
780 //first read the footer. this gives us the offset and size of the global header
781 //then read and store the global header.
782 //below, we will write the the new data at the start of the global header then append the global header and footer below it
783 size_t global_header_size;
784 parse_zip_footer(fp, nrecs, global_header_size, global_header_offset);
785 fseek(fp, static_cast<long>(global_header_offset), SEEK_SET);
786 global_header.resize(global_header_size);
787 size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp);
788 if (res != global_header_size) {
789 throw std::runtime_error("npz_save: header read error while adding to existing zip");
790 }
791 fseek(fp, static_cast<long>(global_header_offset), SEEK_SET);
792 }
793 else {
794 fp = fopen(zipname.c_str(), "wb");
795 }
796
797 std::vector<size_t> lengths;
798 lengths.reserve(data_vec.size());
799 for (auto data_str : data_vec) {
800 lengths.push_back(data_str.length());
801 }
802
803 std::vector<char> npy_header = create_npy_header_string(shape, lengths);
804 size_t max_length = *std::max_element(lengths.begin(), lengths.end());
805
806 std::vector<char> data_str_utf32_LE;
807 data_str_utf32_LE.reserve(max_length*4); // NumPy stores string as UTF-32: https://github.com/numpy/numpy/issues/15347
808 for (size_t i = 0; i < lengths.size(); i++) {
809 std::vector<char> substr_utf32 = utf8_to_utf32_vec_pad(data_vec[i], max_length);
810 data_str_utf32_LE.insert(data_str_utf32_LE.end(), substr_utf32.begin(), substr_utf32.end());
811 }
812
813 // https://github.com/rogersce/cnpy/pull/58/files
814 size_t nels = data_str_utf32_LE.size();
815 size_t nbytes = nels*sizeof(char) + npy_header.size();
816
817 //get the CRC of the data to be added
818 uint32_t crc = vp_mz_crc32(0L, (uint8_t *)&npy_header[0], npy_header.size());
819 if (nels > 0) {
820 crc = vp_mz_crc32(crc, (uint8_t *)&data_str_utf32_LE[0], nels*sizeof(uint8_t));
821 }
822
823 //build the local header
824 std::vector<char> local_header;
825 local_header += "PK"; //first part of sig
826#ifdef VISP_BIG_ENDIAN
827 local_header += vpEndian::swap16bits(static_cast<uint16_t>(0x0403)); //second part of sig
828 local_header += vpEndian::swap16bits(static_cast<uint16_t>(20)); //min version to extract
829 local_header += vpEndian::swap16bits(static_cast<uint16_t>(0)); //general purpose bit flag
830 local_header += vpEndian::swap16bits(static_cast<uint16_t>(0)); //compression method
831 local_header += vpEndian::swap16bits(static_cast<uint16_t>(0)); //file last mod time
832 local_header += vpEndian::swap16bits(static_cast<uint16_t>(0)); //file last mod date
833 local_header += vpEndian::swap32bits(static_cast<uint32_t>(crc)); //crc
834 local_header += vpEndian::swap32bits(static_cast<uint32_t>(nbytes)); //compressed size
835 local_header += vpEndian::swap32bits(static_cast<uint32_t>(nbytes)); //uncompressed size
836 local_header += vpEndian::swap16bits(static_cast<uint16_t>(fname.size())); //fname length
837 local_header += vpEndian::swap16bits(static_cast<uint16_t>(0)); //extra field length
838#else
839 local_header += static_cast<uint16_t>(0x0403); //second part of sig
840 local_header += static_cast<uint16_t>(20); //min version to extract
841 local_header += static_cast<uint16_t>(0); //general purpose bit flag
842 local_header += static_cast<uint16_t>(0); //compression method
843 local_header += static_cast<uint16_t>(0); //file last mod time
844 local_header += static_cast<uint16_t>(0); //file last mod date
845 local_header += static_cast<uint32_t>(crc); //crc
846 local_header += static_cast<uint32_t>(nbytes); //compressed size
847 local_header += static_cast<uint32_t>(nbytes); //uncompressed size
848 local_header += static_cast<uint16_t>(fname.size()); //fname length
849 local_header += static_cast<uint16_t>(0); //extra field length
850#endif
851 local_header += fname;
852
853 //build global header
854 global_header += "PK"; //first part of sig
855#ifdef VISP_BIG_ENDIAN
856 global_header += vpEndian::swap16bits(static_cast<uint16_t>(0x0201)); //second part of sig
857 global_header += vpEndian::swap16bits(static_cast<uint16_t>(20)); //version made by
858 global_header.insert(global_header.end(), local_header.begin()+4, local_header.begin()+30);
859 global_header += static_cast<uint16_t>(0); //file comment length
860 global_header += static_cast<uint16_t>(0); //disk number where file starts
861 global_header += static_cast<uint16_t>(0); //internal file attributes
862 global_header += static_cast<uint32_t>(0); //external file attributes
863 global_header += vpEndian::swap32bits(static_cast<uint32_t>(global_header_offset)); //relative offset of local file header, since it begins where the global header used to begin
864#else
865 global_header += static_cast<uint16_t>(0x0201); //second part of sig
866 global_header += static_cast<uint16_t>(20); //version made by
867 global_header.insert(global_header.end(), local_header.begin()+4, local_header.begin()+30);
868 global_header += static_cast<uint16_t>(0); //file comment length
869 global_header += static_cast<uint16_t>(0); //disk number where file starts
870 global_header += static_cast<uint16_t>(0); //internal file attributes
871 global_header += static_cast<uint32_t>(0); //external file attributes
872 global_header += static_cast<uint32_t>(global_header_offset); //relative offset of local file header, since it begins where the global header used to begin
873#endif
874 global_header += fname;
875
876 //build footer
877 std::vector<char> footer;
878 footer += "PK"; //first part of sig
879#ifdef VISP_BIG_ENDIAN
880 footer += vpEndian::swap16bits(static_cast<uint16_t>(0x0605)); //second part of sig
881 footer += static_cast<uint16_t>(0); //number of this disk
882 footer += static_cast<uint16_t>(0); //disk where footer starts
883 footer += vpEndian::swap16bits(static_cast<uint16_t>(nrecs+1)); //number of records on this disk
884 footer += vpEndian::swap16bits(static_cast<uint16_t>(nrecs+1)); //total number of records
885 footer += vpEndian::swap32bits(static_cast<uint32_t>(global_header.size())); //nbytes of global headers
886 footer += vpEndian::swap32bits(static_cast<uint32_t>(global_header_offset + nbytes + local_header.size())); //offset of start of global headers, since global header now starts after newly written array
887#else
888 footer += static_cast<uint16_t>(0x0605); //second part of sig
889 footer += static_cast<uint16_t>(0); //number of this disk
890 footer += static_cast<uint16_t>(0); //disk where footer starts
891 footer += static_cast<uint16_t>(nrecs+1); //number of records on this disk
892 footer += static_cast<uint16_t>(nrecs+1); //total number of records
893 footer += static_cast<uint32_t>(global_header.size()); //nbytes of global headers
894 footer += static_cast<uint32_t>(global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array
895#endif
896 footer += static_cast<uint16_t>(0); //zip file comment length
897
898 //write everything
899 fwrite(&local_header[0], sizeof(char), local_header.size(), fp);
900 fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp);
901 fwrite(&data_str_utf32_LE[0], sizeof(char), nels, fp);
902 fwrite(&global_header[0], sizeof(char), global_header.size(), fp);
903 fwrite(&footer[0], sizeof(char), footer.size(), fp);
904 fclose(fp);
905}
906
919void visp::cnpy::npz_save(const std::string &zipname, const std::string &fname, const std::string &data_str, const std::string &mode)
920{
921 std::vector<std::string> data_vec;
922 data_vec.push_back(data_str);
923 std::vector<size_t> shape { 1 };
924 npz_save(zipname, fname, data_vec, shape, mode);
925}
926
927#endif
error that can be emitted by ViSP classes.
Definition vpException.h:60
@ ioError
I/O error.
Definition vpException.h:67
@ badValue
Used to indicate that a value is not in the allowed range.
Definition vpException.h:73
@ dimensionError
Bad dimension.
Definition vpException.h:71
static bool checkFilename(const std::string &filename)
Helpers to convert NPY/NPZ format to/from ViSP format.
Definition vpIoTools.h:94
VISP_EXPORT npz_t npz_load(const std::string &fname)
VISP_EXPORT char map_type(const std::type_info &t)
std::map< std::string, NpyArray > npz_t
Definition vpIoTools.h:177
VISP_EXPORT NpyArray npy_load(const std::string &fname)
std::vector< char > utf8_to_utf32_vec_pad(const std::string &utf8, const std::size_t &max_size)
VISP_EXPORT void parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
VISP_EXPORT void npz_save(const std::string &zipname, std::string fname, const std::vector< std::string > &data_vec, const std::vector< size_t > &shape, const std::string &mode="w")
std::vector< char > create_npy_header_string(const std::vector< size_t > &shape, const std::vector< size_t > &lengths)
VISP_EXPORT char BigEndianTest()
VISP_EXPORT void parse_npy_header(FILE *fp, size_t &word_size, std::vector< size_t > &shape, bool &fortran_order, bool &little_endian, char &data_type)
VISP_EXPORT uint32_t swap32bits(uint32_t val)
Definition vpEndian.cpp:60
VISP_EXPORT uint16_t swap16bits(uint16_t val)
Definition vpEndian.cpp:48