Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 18 additions & 31 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@
"@types/eslint": "^8.4.10",
"@types/jest": "^29.2.5",
"@types/jquery": "^3.5.14",
"@types/jszip": "3.1.7",
"@types/lodash": "^4.14.182",
"@types/lodash.clamp": "^4.0.6",
"@types/lodash.debounce": "^4.0.6",
Expand Down
6 changes: 1 addition & 5 deletions packages/code-studio/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"@fortawesome/react-fontawesome": "^0.2.0",
"classnames": "^2.3.1",
"event-target-shim": "^6.0.2",
"jszip": "3.2.2",
"jszip": "3.10.1",
"lodash.debounce": "^4.0.8",
"lodash.throttle": "^4.1.1",
"memoize-one": "^5.1.1",
Expand All @@ -58,10 +58,6 @@
"redux-thunk": "^2.4.1",
"shortid": "^2.2.16"
},
"dependenciesComments": {
"@types/jszip": "3.1.7 is the closest to 3.2.2 available. JSZip adds official typings in 3.4.0, so remove if going past JSZip 3.4.0",
"jszip": "Pinned to 3.2.2 b/c 3.3.0+ breaks nodestream usage. Not fixed as of 3.5.0. https://github.com/Stuk/jszip/issues/663"
},
"homepage": ".",
"main": "build/index.html",
"files": [
Expand Down
2 changes: 1 addition & 1 deletion packages/console/src/csv/CsvInputBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ class CsvInputBar extends Component<CsvInputBarProps, CsvInputBarState> {
}
}

handleFile(file: Blob | JSZipObject, isZip = false): void {
handleFile(file: File | Blob | JSZipObject, isZip = false): void {
log.info(
`Starting CSV parser for ${
file instanceof File ? file.name : 'pasted values'
Expand Down
41 changes: 28 additions & 13 deletions packages/console/src/csv/CsvParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type { IdeSession, Table } from '@deephaven/jsapi-types';
import type { JSZipObject } from 'jszip';
import CsvTypeParser from './CsvTypeParser';
import { CsvTypes } from './CsvFormats';
import makeZipStreamHelper from './ZipStreamHelper';

const log = Log.module('CsvParser');

Expand All @@ -15,7 +16,7 @@ const ZIP_CONSOLIDATE_CHUNKS = 650;
interface CsvParserConstructor {
onFileCompleted: (tables: Table[]) => void;
session: IdeSession;
file: Blob | JSZipObject;
file: File | Blob | JSZipObject;
type: CsvTypes;
readHeaders: boolean;
onProgress: (progressValue: number) => boolean;
Expand Down Expand Up @@ -70,6 +71,8 @@ class CsvParser {
this.onProgress = onProgress;
this.onError = onError;
this.tables = [];
this.rowCount = 0;
this.rowsProcessed = 0;
this.chunks = 0;
this.totalChunks = isZip
? 0
Expand Down Expand Up @@ -102,7 +105,7 @@ class CsvParser {

session: IdeSession;

file: Blob | JSZipObject;
file: File | Blob | JSZipObject;

isZip: boolean;

Expand All @@ -122,6 +125,10 @@ class CsvParser {

types?: string[];

rowCount: number;

rowsProcessed: number;

chunks: number;

totalChunks: number;
Expand Down Expand Up @@ -167,17 +174,20 @@ class CsvParser {
}

parse(): void {
const handleParseDone = (types: string[]) => {
const toParse = this.isZip
? (this.file as JSZipObject).nodeStream(
// JsZip types are incorrect, thus the funny casting
// Actual parameter is 'nodebuffer'
'nodebuffer' as 'nodestream',
this.handleNodeUpdate
)
: (this.file as Blob);
const handleParseDone = (types: string[], rowCount: number) => {
this.types = types;
Papa.parse(toParse, this.config);
this.rowCount = rowCount;

if (this.file instanceof File || this.file instanceof Blob) {
Papa.parse(this.file, this.config);
} else {
const zipStream = makeZipStreamHelper(this.file, this.handleNodeUpdate);
// This is actually a stream, but papaparse TS doesn't like it
Papa.parse(zipStream as unknown as Blob, this.config);
// The stream needs to be manually resumed since jszip starts paused
// Papaparse does not call resume and assumes the stream is already reading
zipStream.resume();
}
};
const typeParser = new CsvTypeParser(
handleParseDone,
Expand Down Expand Up @@ -274,6 +284,9 @@ class CsvParser {
}
assertNotNull(this.headers);
assertNotNull(types);

this.rowsProcessed += columns[0].length;

session
.newTable(this.headers, types, columns, this.timeZone)
.then(table => {
Expand All @@ -294,7 +307,9 @@ class CsvParser {
if (totalChunks > 0) {
progress = Math.round((tables.length / totalChunks) * 50) + 50;
} else {
progress = Math.round(50 + this.zipProgress / 2);
// The zip file can be read entirely while in the middle of parsing
// Since we know the number of rows from the type parsing, use that for progress
progress = Math.round((this.rowsProcessed / this.rowCount) * 50) + 50;
}
log.debug2(`CSV parser progress ${progress}`);
onProgress(progress);
Expand Down
36 changes: 22 additions & 14 deletions packages/console/src/csv/CsvTypeParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import Papa, { Parser, ParseResult, ParseLocalConfig } from 'papaparse';
// Intentionally using isNaN rather than Number.isNaN
/* eslint-disable no-restricted-globals */
import NewTableColumnTypes from './NewTableColumnTypes';
import makeZipStreamHelper from './ZipStreamHelper';

// Initially column types start as unknown
const UNKNOWN = 'unknown';
Expand Down Expand Up @@ -156,8 +157,8 @@ class CsvTypeParser {
}

constructor(
onFileCompleted: (types: string[]) => void,
file: Blob | JSZipObject,
onFileCompleted: (types: string[], rowCount: number) => void,
file: File | Blob | JSZipObject,
readHeaders: boolean,
parentConfig: ParseLocalConfig<unknown, Blob | NodeJS.ReadableStream>,
nullString: string | null,
Expand All @@ -175,6 +176,7 @@ class CsvTypeParser {
this.onError = onError;
this.chunks = 0;
this.totalChunks = totalChunks;
this.rowCount = 0;
this.isZip = isZip;
this.shouldTrim = shouldTrim;
this.zipProgress = 0;
Expand All @@ -192,9 +194,9 @@ class CsvTypeParser {
};
}

onFileCompleted: (types: string[]) => void;
onFileCompleted: (types: string[], rowCount: number) => void;

file: Blob | JSZipObject;
file: File | Blob | JSZipObject;

readHeaders: boolean;

Expand All @@ -210,6 +212,8 @@ class CsvTypeParser {

totalChunks: number;

rowCount: number;

isZip: boolean;

shouldTrim: boolean;
Expand All @@ -219,15 +223,16 @@ class CsvTypeParser {
config: ParseLocalConfig<unknown, Blob | NodeJS.ReadableStream>;

parse(): void {
const toParse = this.isZip
? (this.file as JSZipObject).nodeStream(
// JsZip types are incorrect, thus the funny casting
// Actual parameter is 'nodebuffer'
'nodebuffer' as 'nodestream',
this.handleNodeUpdate
)
: (this.file as Blob);
Papa.parse(toParse, this.config);
if (this.file instanceof File || this.file instanceof Blob) {
Papa.parse(this.file, this.config);
} else {
const zipStream = makeZipStreamHelper(this.file, this.handleNodeUpdate);
// This is actually a stream, but papaparse TS doesn't like it
Papa.parse(zipStream as unknown as Blob, this.config);
// The stream needs to be manually resumed since jszip starts paused
// Papaparse does not call resume and assumes the stream is already reading
zipStream.resume();
}
}

handleChunk(result: ParseResult<string[]>, parser: Parser): void {
Expand All @@ -245,6 +250,8 @@ class CsvTypeParser {
}
}

this.rowCount += data.length;

assertNotNull(this.types);

const cloneTypes = [...this.types];
Expand Down Expand Up @@ -294,7 +301,8 @@ class CsvTypeParser {
type === UNKNOWN || type === NewTableColumnTypes.LOCAL_TIME
? NewTableColumnTypes.STRING
: type
)
),
this.rowCount
);
}
}
Expand Down
33 changes: 33 additions & 0 deletions packages/console/src/csv/ZipStreamHelper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import type { JSZipObject, OnUpdateCallback, JSZipStreamHelper } from 'jszip';

/**
* This is used to help papaparse understand our stream.
* It uses these fields for feature detection, but never actually calls read()
* https://github.com/mholt/PapaParse/blob/master/papaparse.js#L244
*/
interface ZipStreamHelper extends JSZipStreamHelper<string> {
readable: boolean;
read(): void;
removeListener(): void;
}

export default function makeZipStreamHelper(
zipObj: JSZipObject,
onUpdate: OnUpdateCallback
) {
const helper: ZipStreamHelper = (
zipObj as JSZipObject & {
// The type could be anything except nodebuffer from https://stuk.github.io/jszip/documentation/api_zipobject/internal_stream.html
// We only need it as a string though
// JSZip types don't include this method for some reason
internalStream(type: 'string'): JSZipStreamHelper<string>;
}
).internalStream('string') as ZipStreamHelper;

helper.readable = true;
helper.read = () => false;
helper.removeListener = () => false;
helper.on('data', (_, metadata) => onUpdate(metadata));

return helper;
}