Wiki

Clone wiki

Okapi / OkapiStreams

Introduction

Okapi has been updated with preliminary support for streams as input to filters, filter writers and parameters. Some steps have also been updated to allow stream inputs (SegmentationStep). RawDocument is now reusable and is backed by a temp file if the content is larger than a specified buffer size. Steps can pass along a RawDocument and reset it when done so that subsequent steps can re-read the input stream.

Pseudo Code Extraction

// Inputs from outside
InputStream sourceStream;
InputStream srxStream;
InputStream configStream;
String      configId;

// we need more work so we can output directly to a stream
String      outputPath;

// we will need to reuse the srx stream
byte[] srxAsBytes = StreamUtil.inputStreamToBytes(srxStream);

// set up filter configuration with custom paramaters
FilterConfiguration config = MAPPER.getConfiguration(configId);
// stream will be used if parametersLocation is null
config.loadParametersFromStream(configStream);
config.custom = true;

// pipeline
IPipeline = pipeline = new Pipeline();

// input document
RawDocument rd = new RawDocument(sourceStream, "UTF-8", LocaleId.ENGLISH, LocaleId.SPANISH);
rd.setFilterConfigId(configId);

// segmentation
SegmentationStep segStep = new SegmentationStep();
Parameters p = (Parameters)segStep.getParameters();
p.setSegmentSource(true);
p.setSegmentTarget(true);
ByteArrayInputStream srcSrx = new ByteArrayInputStream(srxAsBytes);
p.setSourceSrxStream(srcSrx);
ByteArrayInputStream trgSrx = new ByteArrayInputStream(srxAsBytes);
p.setTargetSrxStream(trgSrx);

// xliff output
XLIFFAndSkeletonWriter writer = new XLIFFAndSkeletonWriter();
FilterEventsWriterStep fwStep = new FilterEventsWriterStep();
fwStep.setFilterWriter(writer);
fwStep.setOutputURI(Util.toURI(outputPath));
fwStep.setOutputEncoding("UTF-8");
fwStep.setLastOutputStep(true);
fwStep.setTargetLocale(LocaleId.SPANISH);

pipeline.addStep(new RawDocumentToFilterEventsStep());
pipeline.addStep(segStep);
pipeline.addStep(fewStep);

pipeline.execute(rd);

Pseudo Code Merge

// Inputs from outside
InputStream xliffStream;
InputStream skeletonStream;

// optional in case skeleton merge fails
String      configId;
InputStream originalStream;
InputStream configStream

// pipeline
IPipeline = pipeline = new Pipeline();

RawDocument skelRawDoc = new RawDocument(skeletonStream, null, LocaleId.ENGLISH, LocaleId.SPANISH);

// set up filter configuration with custom paramaters
FilterConfiguration config = MAPPER.getConfiguration(configId);
// stream will be used if parametersLocation is null
config.loadParametersFromStream(configStream);
config.custom = true;

RawDocument originalDoc = new RawDocument(originalStream, "UTF-8", LocaleId.ENGLISH, LocaleId.SPANISH);
originalDoc.setFilterConfigId(configId);

RawDocument xliffDoc = new RawDocument(xliffStream, "UTF-8", LocaleId.ENGLISH, LocaleId.SPANISH);

// combined merger does skeleton merge, if it fails traditional merge
CombinedXliffMergerStep merger = new CombinedXliffMergerStep()
merger.setSecondInput(skelDoc);
merger.setThirdInput(originalDoc);

RawDocumentToOutputStreamStep outputStep = new RawDocumentToOutputStreamStep();
ByteArrayOutputStream outputBytes = new ByteArrayOutputStream();

// merged file will now be written to memory in outputBytes
outputStep.setOutputStream(outputBytes);

pipeline.addStep(merger);
pipeline.addStep(outputStep);

pipeline.execute(xliffDoc);

Updated