Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions Example.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,42 @@ LargeData -r 300000 -c 15 -S 60000 --use-style
- Reorder sheets (move left/right)
- Save or Save As to produce a modified workbook

<br />

---

## [sax reader](https://github.com/QtExcel/QXlsx/blob/j2doll/sax_reader/TestExcel/extractdata_sax.cpp)

```cpp
void dump_all_sheets_sax(QXlsx::Document& doc)
{
QXlsx::sax_options opt;
opt.resolve_shared_strings = true; // If there are many strings, set to false to save more RAM (but sharedString index may be output instead)
opt.read_formulas_as_text = true; // Output formula as string instead of result value (set false if not needed)
opt.stop_on_empty_sheetdata = false; // Continue even if sheetData is empty

const QStringList sheets = doc.sheetNames();
qInfo() << "sheet count:" << sheets.size();

for (const QString& sheet_name : sheets) {
qInfo().noquote() << "\n=== SHEET:" << sheet_name << "===";

const bool ok = doc.read_sheet_sax(
sheet_name,
opt,
[&](const QXlsx::sax_cell& cell) -> bool {
qDebug().noquote()
<< QString("%1!R%2C%3 = %4")
.arg(cell.sheet_name)
.arg(cell.row)
.arg(cell.col)
.arg(cell.value.toString());
return true; // continue
});

qInfo() << "sheet done:" << sheet_name << "ok=" << ok;
}
}
```


2 changes: 1 addition & 1 deletion HowToSetProject-cmake.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Using cmake

> *Read this in other languages: [English](HowToSetProject-cmake.md), :kr: [한국어](HowToSetProject-cmake.ko.md), :zh-CN: [简体中文](HowToSetProject-cmake.zh-CN.md)*
> *Read this in other languages: [English](HowToSetProject-cmake.md), :cn: [简体中文](HowToSetProject-cmake.zh-CN.md)*

### To install QXlsx

Expand Down
2 changes: 2 additions & 0 deletions QXlsx/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ set(SRC_FILES
source/xlsxdocument.cpp
source/xlsxrelationships.cpp
source/xlsxutility.cpp
source/xlsxreadsax.cpp
header/xlsxabstractooxmlfile_p.h
header/xlsxchartsheet_p.h
header/xlsxdocpropsapp_p.h
Expand Down Expand Up @@ -112,6 +113,7 @@ set(SRC_FILES
header/xlsxdrawing_p.h
header/xlsxrichstring_p.h
header/xlsxutility_p.h
header/xlsxreadsax.h
)

set(QXLSX_PUBLIC_HEADERS
Expand Down
7 changes: 4 additions & 3 deletions QXlsx/QXlsx.pri
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ $${QXLSX_HEADERPATH}xlsxworkbook_p.h \
$${QXLSX_HEADERPATH}xlsxworksheet.h \
$${QXLSX_HEADERPATH}xlsxworksheet_p.h \
$${QXLSX_HEADERPATH}xlsxzipreader_p.h \
$${QXLSX_HEADERPATH}xlsxzipwriter_p.h
$${QXLSX_HEADERPATH}xlsxzipwriter_p.h \
$${QXLSX_HEADERPATH}xlsxreadsax.h

SOURCES += \
$${QXLSX_SOURCEPATH}xlsxabstractooxmlfile.cpp \
Expand Down Expand Up @@ -135,8 +136,8 @@ $${QXLSX_SOURCEPATH}xlsxutility.cpp \
$${QXLSX_SOURCEPATH}xlsxworkbook.cpp \
$${QXLSX_SOURCEPATH}xlsxworksheet.cpp \
$${QXLSX_SOURCEPATH}xlsxzipreader.cpp \
$${QXLSX_SOURCEPATH}xlsxzipwriter.cpp

$${QXLSX_SOURCEPATH}xlsxzipwriter.cpp \
$${QXLSX_SOURCEPATH}xlsxreadsax.cpp

########################################
# custom setting for compiler & system
Expand Down
6 changes: 6 additions & 0 deletions QXlsx/QXlsx.pro
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,10 @@ QXLSX_HEADERPATH=$$PWD/header/
QXLSX_SOURCEPATH=$$PWD/source/
include($$PWD/QXlsx.pri)

HEADERS += \
header/xlsxreadsax.h

SOURCES += \
source/xlsxreadsax.cpp


12 changes: 12 additions & 0 deletions QXlsx/header/xlsxdocument.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "xlsxformat.h"
#include "xlsxglobal.h"
#include "xlsxworksheet.h"
#include "xlsxreadsax.h"

#include <QIODevice>
#include <QImage>
Expand Down Expand Up @@ -132,6 +133,17 @@ class QXLSX_EXPORT Document : public QObject
bool autosizeColumnWidth(int colFirst, int colLast);
bool autosizeColumnWidth();


/////////////////////////////////
// New feature: Stream sheet cells with callback "without saving"
bool read_sheet_sax(const QString& sheet_name,
const sax_options& opt,
const sax_cell_callback& on_cell);

Comment on lines +138 to +142
Copy link

Copilot AI Dec 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The public API functions read_sheet_sax are not documented. Public API methods should include documentation comments explaining parameters, return values, and behavior.

Suggested change
// New feature: Stream sheet cells with callback "without saving"
bool read_sheet_sax(const QString& sheet_name,
const sax_options& opt,
const sax_cell_callback& on_cell);
// New feature: Stream sheet cells with callback "without saving"
/**
* @brief Reads cells from a worksheet using a SAX-style parser.
*
* These overloads stream cell data from a worksheet without modifying or
* saving the document. For each parsed cell, the provided callback
* (@p on_cell) is invoked with information about that cell. This is
* useful for processing large worksheets without loading the entire
* sheet into memory.
*
* The @p opt parameter controls parsing options such as which parts of
* the worksheet to include or skip during streaming.
*
* @param sheet_name Name of the worksheet to read.
* @param opt Options that configure the SAX reading behavior.
* @param on_cell Callback invoked for each cell encountered while
* streaming the worksheet.
*
* @return @c true if the worksheet was found and read successfully;
* otherwise @c false (for example, if the sheet does not exist
* or an error occurs while reading).
*/
bool read_sheet_sax(const QString& sheet_name,
const sax_options& opt,
const sax_cell_callback& on_cell);
/**
* @brief Reads cells from a worksheet, selected by index, using a
* SAX-style parser.
*
* This overload behaves like the name-based version but selects the
* worksheet by its zero-based index in the workbook.
*
* @param sheet_index Zero-based index of the worksheet to read.
* @param opt Options that configure the SAX reading behavior.
* @param on_cell Callback invoked for each cell encountered while
* streaming the worksheet.
*
* @return @c true if the worksheet was found and read successfully;
* otherwise @c false.
*/

Copilot uses AI. Check for mistakes.
bool read_sheet_sax(int sheet_index,
const sax_options& opt,
const sax_cell_callback& on_cell);

private:
QMap<int, int> getMaximalColumnWidth(int firstRow = 1, int lastRow = INT_MAX);

Expand Down
5 changes: 5 additions & 0 deletions QXlsx/header/xlsxdocument_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#include <QMap>

#include <memory>

QT_BEGIN_NAMESPACE_XLSX

class DocumentPrivate
Expand All @@ -35,6 +37,9 @@ class DocumentPrivate
std::shared_ptr<Workbook> workbook;
std::shared_ptr<ContentTypes> contentTypes;
bool isLoad;

// Store the entire xlsx (zip) bytes so that even when opened with QIODevice, the zip can be reopened in SAX
std::shared_ptr<QByteArray> package_bytes;
};

QT_END_NAMESPACE_XLSX
Expand Down
40 changes: 40 additions & 0 deletions QXlsx/header/xlsxreadsax.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef XLSXREADSAX_H
#define XLSXREADSAX_H

#include <QXmlStreamReader>
#include <QString>
#include <QVariant>
#include <functional>

namespace QXlsx {

struct sax_options
{
bool resolve_shared_strings = true;
bool read_formulas_as_text = false;
Comment on lines +10 to +14
Copy link

Copilot AI Dec 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sax_options structure is not documented. Public API structures should include documentation comments explaining the purpose and usage of each field.

Suggested change
struct sax_options
{
bool resolve_shared_strings = true;
bool read_formulas_as_text = false;
/**
* @brief Options controlling SAX-based reading of worksheet XML.
*
* All fields have sensible defaults so that callers can use the
* default-constructed instance for common cases and override only
* the options they care about.
*/
struct sax_options
{
/**
* @brief Whether to resolve shared-string references to their text.
*
* When true, cells that reference entries in sharedStrings.xml are
* returned with their corresponding string value. When false, the
* raw shared-string index or reference is preserved instead.
*/
bool resolve_shared_strings = true;
/**
* @brief Whether to read formulas as plain text instead of evaluating.
*
* When true, cell formulas are exposed as their textual representation
* (e.g. "=A1+B1") rather than any cached or computed result value.
*/
bool read_formulas_as_text = false;
/**
* @brief Whether to stop parsing when encountering an empty sheet data.
*
* When true, the reader may stop processing early if the worksheet
* contains an empty sheetData section, which can be used as a simple
* optimization for fully empty sheets.
*/

Copilot uses AI. Check for mistakes.
bool stop_on_empty_sheetdata = false;
};

struct sax_cell
{
QString sheet_name;
int row = 0; // 1-based
int col = 0; // 1-based
Comment on lines +17 to +22
Copy link

Copilot AI Dec 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sax_cell structure is not documented. Public API structures should include documentation comments explaining the purpose and usage of each field.

Suggested change
struct sax_cell
{
QString sheet_name;
int row = 0; // 1-based
int col = 0; // 1-based
/**
* @brief Represents a single cell encountered while reading a sheet with SAX.
*
* Instances of this struct are passed to sax_cell_callback during
* read_sheet_xml_sax() to expose cell coordinates and value to the caller
* without building an in-memory worksheet model.
*/
struct sax_cell
{
/**
* @brief Name of the worksheet that contains this cell.
*
* This is typically the sheet name as it appears in the workbook.
*/
QString sheet_name;
/**
* @brief Row index of the cell (1-based).
*
* A value of 1 refers to the first row in the sheet.
*/
int row = 0; // 1-based
/**
* @brief Column index of the cell (1-based).
*
* A value of 1 refers to the first column in the sheet.
*/
int col = 0; // 1-based
/**
* @brief The value stored in the cell.
*
* The variant typically holds types such as bool, double, or QString,
* depending on the cell's data type and the selected sax_options.
* This is the minimum information required to consume cell contents.
*/

Copilot uses AI. Check for mistakes.
QVariant value; // bool/double/QString etc. (minimum required)
};

using sax_cell_callback = std::function<bool(const sax_cell&)>;

// Load all of sharedStrings.xml (optional) - simple implementation
class ZipReader;
QStringList load_shared_strings_all(ZipReader& zip);

// Parse sheet.xml with SAX
bool read_sheet_xml_sax(const QByteArray& sheet_xml,
Copy link

Copilot AI Dec 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The read_sheet_xml_sax function does not receive the sheet name as a parameter, making it impossible to populate the sax_cell.sheet_name field. The function signature should include the sheet name so that the callback can provide complete cell information.

Suggested change
bool read_sheet_xml_sax(const QByteArray& sheet_xml,
bool read_sheet_xml_sax(const QByteArray& sheet_xml,
const QString& sheet_name,

Copilot uses AI. Check for mistakes.
const sax_options& opt,
const QStringList* shared_strings, // nullptr 가능
Copy link

Copilot AI Dec 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment contains Korean text ("nullptr 가능" meaning "nullptr possible"). Comments in code should use English for consistency and maintainability.

Suggested change
const QStringList* shared_strings, // nullptr 가능
const QStringList* shared_strings, // can be nullptr

Copilot uses AI. Check for mistakes.
const sax_cell_callback& on_cell);

} // namespace QXlsx

#endif // XLSXREADSAX_H
65 changes: 65 additions & 0 deletions QXlsx/source/xlsxdocument.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include "xlsxzipreader_p.h"
#include "xlsxzipwriter_p.h"

#include "xlsxreadsax.h"

#include <QBuffer>
#include <QDebug>
#include <QDir>
Expand Down Expand Up @@ -1557,4 +1559,67 @@ bool Document::autosizeColumnWidth()
return erg;
}

/////////////////////////////////////////////////////////////////////
// ======================= SAX streaming API =========================
bool Document::read_sheet_sax(int sheet_index,
const sax_options& opt,
const sax_cell_callback& on_cell)
{
if (!d_ptr || !d_ptr->workbook)
return false;

// Open zip (supports both file path and QIODevice based)
std::unique_ptr<QIODevice> owned_device;

if (!d_ptr->packageName.isEmpty()) {
auto f = std::make_unique<QFile>(d_ptr->packageName);
if (!f->open(QIODevice::ReadOnly))
return false;
owned_device = std::move(f);
} else if (d_ptr->package_bytes && !d_ptr->package_bytes->isEmpty()) {
auto b = std::make_unique<QBuffer>(d_ptr->package_bytes.get());
if (!b->open(QIODevice::ReadOnly))
return false;
owned_device = std::move(b);
} else {
return false;
}

ZipReader zip(owned_device.get());

// shared strings (optional)
QStringList shared_strings;
if (opt.resolve_shared_strings) {
shared_strings = QXlsx::load_shared_strings_all(zip);
}

// sheet XML path: workbook already has filePath (actual path determined by relationship (rels))
AbstractSheet *abs_sheet = d_ptr->workbook->sheet(sheet_index);
if (!abs_sheet)
return false;

const QString sheet_path = abs_sheet->filePath();
const QByteArray sheet_xml = zip.fileData(sheet_path);

if (sheet_xml.isEmpty())
return false;

return QXlsx::read_sheet_xml_sax(sheet_xml, opt,
opt.resolve_shared_strings ? &shared_strings : nullptr,
on_cell);
}

bool Document::read_sheet_sax(const QString& sheet_name,
const sax_options& opt,
const sax_cell_callback& on_cell)
{
const QStringList names = d_ptr->workbook->worksheetNames();
const int idx = names.indexOf(sheet_name);
if (idx < 0)
return false;
return read_sheet_sax(idx, opt, on_cell);
}
//////////////////////////////////////////////////////////////////////


QT_END_NAMESPACE_XLSX
Loading
Loading