Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions Example.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,42 @@ LargeData -r 300000 -c 15 -S 60000 --use-style
- Reorder sheets (move left/right)
- Save or Save As to produce a modified workbook

<br />

---

## [sax reader](https://github.com/QtExcel/QXlsx/blob/j2doll/sax_reader/TestExcel/extractdata_sax.cpp)

```cpp
void dump_all_sheets_sax(QXlsx::Document& doc)
{
QXlsx::sax_options opt;
opt.resolve_shared_strings = true; // If there are many strings, set to false to save more RAM (but sharedString index may be output instead)
opt.read_formulas_as_text = true; // Output formula as string instead of result value (set false if not needed)
opt.stop_on_empty_sheetdata = false; // Continue even if sheetData is empty

const QStringList sheets = doc.sheetNames();
qInfo() << "sheet count:" << sheets.size();

for (const QString& sheet_name : sheets) {
qInfo().noquote() << "\n=== SHEET:" << sheet_name << "===";

const bool ok = doc.read_sheet_sax(
sheet_name,
opt,
[&](const QXlsx::sax_cell& cell) -> bool {
qDebug().noquote()
<< QString("%1!R%2C%3 = %4")
.arg(cell.sheet_name)
.arg(cell.row)
.arg(cell.col)
.arg(cell.value.toString());
return true; // continue
});

qInfo() << "sheet done:" << sheet_name << "ok=" << ok;
}
}
```


2 changes: 1 addition & 1 deletion HowToSetProject-cmake.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Using cmake

> *Read this in other languages: [English](HowToSetProject-cmake.md), :kr: [한국어](HowToSetProject-cmake.ko.md), :zh-CN: [简体中文](HowToSetProject-cmake.zh-CN.md)*
> *Read this in other languages: [English](HowToSetProject-cmake.md), :cn: [简体中文](HowToSetProject-cmake.zh-CN.md)*

### To install QXlsx

Expand Down
2 changes: 2 additions & 0 deletions QXlsx/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ set(SRC_FILES
source/xlsxdocument.cpp
source/xlsxrelationships.cpp
source/xlsxutility.cpp
source/xlsxreadsax.cpp
header/xlsxabstractooxmlfile_p.h
header/xlsxchartsheet_p.h
header/xlsxdocpropsapp_p.h
Expand Down Expand Up @@ -112,6 +113,7 @@ set(SRC_FILES
header/xlsxdrawing_p.h
header/xlsxrichstring_p.h
header/xlsxutility_p.h
header/xlsxreadsax.h
)

set(QXLSX_PUBLIC_HEADERS
Expand Down
7 changes: 4 additions & 3 deletions QXlsx/QXlsx.pri
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ $${QXLSX_HEADERPATH}xlsxworkbook_p.h \
$${QXLSX_HEADERPATH}xlsxworksheet.h \
$${QXLSX_HEADERPATH}xlsxworksheet_p.h \
$${QXLSX_HEADERPATH}xlsxzipreader_p.h \
$${QXLSX_HEADERPATH}xlsxzipwriter_p.h
$${QXLSX_HEADERPATH}xlsxzipwriter_p.h \
$${QXLSX_HEADERPATH}xlsxreadsax.h

SOURCES += \
$${QXLSX_SOURCEPATH}xlsxabstractooxmlfile.cpp \
Expand Down Expand Up @@ -135,8 +136,8 @@ $${QXLSX_SOURCEPATH}xlsxutility.cpp \
$${QXLSX_SOURCEPATH}xlsxworkbook.cpp \
$${QXLSX_SOURCEPATH}xlsxworksheet.cpp \
$${QXLSX_SOURCEPATH}xlsxzipreader.cpp \
$${QXLSX_SOURCEPATH}xlsxzipwriter.cpp

$${QXLSX_SOURCEPATH}xlsxzipwriter.cpp \
$${QXLSX_SOURCEPATH}xlsxreadsax.cpp

########################################
# custom setting for compiler & system
Expand Down
6 changes: 6 additions & 0 deletions QXlsx/QXlsx.pro
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,10 @@ QXLSX_HEADERPATH=$$PWD/header/
QXLSX_SOURCEPATH=$$PWD/source/
include($$PWD/QXlsx.pri)

HEADERS += \
header/xlsxreadsax.h

SOURCES += \
source/xlsxreadsax.cpp


12 changes: 12 additions & 0 deletions QXlsx/header/xlsxdocument.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "xlsxformat.h"
#include "xlsxglobal.h"
#include "xlsxworksheet.h"
#include "xlsxreadsax.h"

#include <QIODevice>
#include <QImage>
Expand Down Expand Up @@ -132,6 +133,17 @@ class QXLSX_EXPORT Document : public QObject
bool autosizeColumnWidth(int colFirst, int colLast);
bool autosizeColumnWidth();


/////////////////////////////////
// New feature: Stream sheet cells with callback "without saving"
bool read_sheet_sax(const QString& sheet_name,
const sax_options& opt,
const sax_cell_callback& on_cell);

bool read_sheet_sax(int sheet_index,
const sax_options& opt,
const sax_cell_callback& on_cell);

private:
QMap<int, int> getMaximalColumnWidth(int firstRow = 1, int lastRow = INT_MAX);

Expand Down
5 changes: 5 additions & 0 deletions QXlsx/header/xlsxdocument_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#include <QMap>

#include <memory>

QT_BEGIN_NAMESPACE_XLSX

class DocumentPrivate
Expand All @@ -35,6 +37,9 @@ class DocumentPrivate
std::shared_ptr<Workbook> workbook;
std::shared_ptr<ContentTypes> contentTypes;
bool isLoad;

// Store the entire xlsx (zip) bytes so that even when opened with QIODevice, the zip can be reopened in SAX
std::shared_ptr<QByteArray> package_bytes;
};

QT_END_NAMESPACE_XLSX
Expand Down
40 changes: 40 additions & 0 deletions QXlsx/header/xlsxreadsax.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef XLSXREADSAX_H
#define XLSXREADSAX_H

#include <QXmlStreamReader>
#include <QString>
#include <QVariant>
#include <functional>

namespace QXlsx {

struct sax_options
{
bool resolve_shared_strings = true;
bool read_formulas_as_text = false;
bool stop_on_empty_sheetdata = false;
};

struct sax_cell
{
QString sheet_name;
int row = 0; // 1-based
int col = 0; // 1-based
QVariant value; // bool/double/QString etc. (minimum required)
};

using sax_cell_callback = std::function<bool(const sax_cell&)>;

// Load all of sharedStrings.xml (optional) - simple implementation
class ZipReader;
QStringList load_shared_strings_all(ZipReader& zip);

// Parse sheet.xml with SAX
bool read_sheet_xml_sax(const QByteArray& sheet_xml,
const sax_options& opt,
const QStringList* shared_strings, // nullptr 가능
const sax_cell_callback& on_cell);

} // namespace QXlsx

#endif // XLSXREADSAX_H
65 changes: 65 additions & 0 deletions QXlsx/source/xlsxdocument.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include "xlsxzipreader_p.h"
#include "xlsxzipwriter_p.h"

#include "xlsxreadsax.h"

#include <QBuffer>
#include <QDebug>
#include <QDir>
Expand Down Expand Up @@ -1557,4 +1559,67 @@ bool Document::autosizeColumnWidth()
return erg;
}

/////////////////////////////////////////////////////////////////////
// ======================= SAX streaming API =========================
bool Document::read_sheet_sax(int sheet_index,
const sax_options& opt,
const sax_cell_callback& on_cell)
{
if (!d_ptr || !d_ptr->workbook)
return false;

// Open zip (supports both file path and QIODevice based)
std::unique_ptr<QIODevice> owned_device;

if (!d_ptr->packageName.isEmpty()) {
std::unique_ptr<QFile> f(new QFile(d_ptr->packageName));
if (!f->open(QIODevice::ReadOnly))
return false;
owned_device = std::move(f);
} else if (d_ptr->package_bytes && !d_ptr->package_bytes->isEmpty()) {
std::unique_ptr<QBuffer> b(new QBuffer(d_ptr->package_bytes.get()));
if (!b->open(QIODevice::ReadOnly))
return false;
owned_device = std::move(b);
} else {
return false;
}

ZipReader zip(owned_device.get());

// shared strings (optional)
QStringList shared_strings;
if (opt.resolve_shared_strings) {
shared_strings = QXlsx::load_shared_strings_all(zip);
}

// sheet XML path: workbook already has filePath (actual path determined by relationship (rels))
AbstractSheet *abs_sheet = d_ptr->workbook->sheet(sheet_index);
if (!abs_sheet)
return false;

const QString sheet_path = abs_sheet->filePath();
const QByteArray sheet_xml = zip.fileData(sheet_path);

if (sheet_xml.isEmpty())
return false;

return QXlsx::read_sheet_xml_sax(sheet_xml, opt,
opt.resolve_shared_strings ? &shared_strings : nullptr,
on_cell);
}

bool Document::read_sheet_sax(const QString& sheet_name,
const sax_options& opt,
const sax_cell_callback& on_cell)
{
const QStringList names = d_ptr->workbook->worksheetNames();
const int idx = names.indexOf(sheet_name);
if (idx < 0)
return false;
return read_sheet_sax(idx, opt, on_cell);
}
//////////////////////////////////////////////////////////////////////


QT_END_NAMESPACE_XLSX
Loading