Qt-interest Archive, January 2007
Sax progresive parsing for large files
Message 1 in thread
Hi all
I try to build a Qt-4 program for parse a lager xml using Sax
i subclass QXmlInputSource and reinplement fetchData for read data in chunks
as comentend in this post
http://lists.trolltech.com/qt-interest/2006-08/msg00217.html
i write this code but it only read the first chunk of data and end
with out call any method in MyXmlHandler
Can any body say me what is wrong in this code, and what is the
correct way for parse big xml files. Thanks in advance
int main(int arg,char** argv){
QXmlSimpleReader reader;
MyXmlHandler handler;
QFile file("/test-01.xml");
reader.setContentHandler(&handler);
reader.setErrorHandler(&handler);
if (!file.open(QFile::ReadOnly | QFile::Text))
return 0;
ChunkedXmlInputSource xmlInputSource(&file);
reader.parse(&xmlInputSource,true);
}
if i changue the main to call parseContinue in this way it dontÅ work,
i allways get and unexpected end of file.
----------------------------------------------
reader.parse(&xmlInputSource,true);
while(reader.parseContinue()){}
------------------------------------------------
----------------FechData implementation --------------------------
ChunkedXMLInputSource::fetchData()
{
std::cout<<"FechData"<<std::endl;
if ( m_io->atEnd() ){
std::cout<<"AT END"<<std::endl;
setData(QByteArray());
return;
}
QByteArray data;
data.reserve(MAX_CHUNK_LENGTH);
QDataStream input( m_io );
input.readRawData( data.data(), MAX_CHUNK_LENGTH );
setData(data);
}
Message 2 in thread
Hi,
pepone.onrez wrote:
> i subclass QXmlInputSource and reinplement fetchData for read data in
> chunks
> as comentend in this post
> http://lists.trolltech.com/qt-interest/2006-08/msg00217.html
>
> i write this code but it only read the first chunk of data and end
> with out call any method in MyXmlHandler
>
> Can any body say me what is wrong in this code, and what is the
> correct way for parse big xml files. Thanks in advance
The only thing you still need to do is continually call parseContinue()
until all of the data has been received. See the attached files; I hope
it's useful. I should have made a more complete example in the first
place...
Tim
// us
// us
#include "ChunkedXMLInputSource.h"
// qt
#include <QByteArray>
#include <QDataStream>
static const int MAX_CHUNK_LENGTH = 524288;
ChunkedXMLInputSource::ChunkedXMLInputSource( QIODevice* dev )
: m_io( dev )
{
fetchData();
}
ChunkedXMLInputSource::~ChunkedXMLInputSource()
{
}
void ChunkedXMLInputSource::fetchData()
{
qDebug() << "ChunkedXMLInputSource::fetchData";
if ( m_io->atEnd() )
{
setData( QByteArray() );
return;
}
QByteArray data;
data.resize( MAX_CHUNK_LENGTH );
QDataStream input( m_io );
input.readRawData( data.data(), MAX_CHUNK_LENGTH );
setData( data );
}
#ifndef CHUNKED_XML_INPUT_SOURCE_H
#ifndef CHUNKED_XML_INPUT_SOURCE_H
#define CHUNKED_XML_INPUT_SOURCE_H
// qt
#include <QtXml>
class QFile;
class QTextStream;
class QIODevice;
class ChunkedXMLInputSource : public QXmlInputSource
{
public:
ChunkedXMLInputSource( QIODevice* dev );
~ChunkedXMLInputSource();
virtual void fetchData();
private:
ChunkedXMLInputSource();
ChunkedXMLInputSource( QFile& );
ChunkedXMLInputSource( QTextStream& );
QIODevice* m_io;
};
#endif // CHUNKED_XML_INPUT_SOURCE_H
#include <QtXml>
#include <QCoreApplication>
#include <QFile>
#include "ChunkedXMLInputSource.h"
int main( int argc, char* argv[] )
{
QCoreApplication app( argc, argv );
if ( app.arguments().size() < 2 )
{
qWarning() << "usage:" << app.arguments()[0] << "<XML input file>";
return 0;
}
QXmlSimpleReader reader;
reader.setContentHandler( new QXmlDefaultHandler );
QFile f( app.arguments()[1] );
if ( !f.open( QIODevice::ReadOnly ) )
{
qWarning() << "failed to open:" << app.arguments()[1];
return 0;
}
reader.parse( new ChunkedXMLInputSource( &f ), true );
while ( reader.parseContinue() )
{}
return 0;
}