Wiki
Clone wikiOkapi / OkapiStreams
Introduction
Okapi has been updated with preliminary support for streams as input to filters, filter writers and parameters. Some steps have also been updated to allow stream inputs (SegmentationStep). RawDocument is now reusable and is backed by a temp file if the content is larger than a specified buffer size. Steps can pass along a RawDocument and reset it when done so that subsequent steps can re-read the input stream.
Pseudo Code Extraction
// Inputs from outside InputStream sourceStream; InputStream srxStream; InputStream configStream; String configId; // we need more work so we can output directly to a stream String outputPath; // we will need to reuse the srx stream byte[] srxAsBytes = StreamUtil.inputStreamToBytes(srxStream); // set up filter configuration with custom paramaters FilterConfiguration config = MAPPER.getConfiguration(configId); // stream will be used if parametersLocation is null config.loadParametersFromStream(configStream); config.custom = true; // pipeline IPipeline = pipeline = new Pipeline(); // input document RawDocument rd = new RawDocument(sourceStream, "UTF-8", LocaleId.ENGLISH, LocaleId.SPANISH); rd.setFilterConfigId(configId); // segmentation SegmentationStep segStep = new SegmentationStep(); Parameters p = (Parameters)segStep.getParameters(); p.setSegmentSource(true); p.setSegmentTarget(true); ByteArrayInputStream srcSrx = new ByteArrayInputStream(srxAsBytes); p.setSourceSrxStream(srcSrx); ByteArrayInputStream trgSrx = new ByteArrayInputStream(srxAsBytes); p.setTargetSrxStream(trgSrx); // xliff output XLIFFAndSkeletonWriter writer = new XLIFFAndSkeletonWriter(); FilterEventsWriterStep fwStep = new FilterEventsWriterStep(); fwStep.setFilterWriter(writer); fwStep.setOutputURI(Util.toURI(outputPath)); fwStep.setOutputEncoding("UTF-8"); fwStep.setLastOutputStep(true); fwStep.setTargetLocale(LocaleId.SPANISH); pipeline.addStep(new RawDocumentToFilterEventsStep()); pipeline.addStep(segStep); pipeline.addStep(fewStep); pipeline.execute(rd);
Pseudo Code Merge
// Inputs from outside InputStream xliffStream; InputStream skeletonStream; // optional in case skeleton merge fails String configId; InputStream originalStream; InputStream configStream // pipeline IPipeline = pipeline = new Pipeline(); RawDocument skelRawDoc = new RawDocument(skeletonStream, null, LocaleId.ENGLISH, LocaleId.SPANISH); // set up filter configuration with custom paramaters FilterConfiguration config = MAPPER.getConfiguration(configId); // stream will be used if parametersLocation is null config.loadParametersFromStream(configStream); config.custom = true; RawDocument originalDoc = new RawDocument(originalStream, "UTF-8", LocaleId.ENGLISH, LocaleId.SPANISH); originalDoc.setFilterConfigId(configId); RawDocument xliffDoc = new RawDocument(xliffStream, "UTF-8", LocaleId.ENGLISH, LocaleId.SPANISH); // combined merger does skeleton merge, if it fails traditional merge CombinedXliffMergerStep merger = new CombinedXliffMergerStep() merger.setSecondInput(skelDoc); merger.setThirdInput(originalDoc); RawDocumentToOutputStreamStep outputStep = new RawDocumentToOutputStreamStep(); ByteArrayOutputStream outputBytes = new ByteArrayOutputStream(); // merged file will now be written to memory in outputBytes outputStep.setOutputStream(outputBytes); pipeline.addStep(merger); pipeline.addStep(outputStep); pipeline.execute(xliffDoc);
Updated