Skip to content

Commit 8f198c5

Browse files
Support parquet file format. (#100)
1 parent 0ceb7db commit 8f198c5

4 files changed

Lines changed: 107 additions & 1 deletion

File tree

handwritten/bigquery/samples/system-test/tables.test.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,26 @@ test.serial(`should extract a table to GCS`, async t => {
151151
.start();
152152
});
153153

154+
test(`should load a GCS Parquet file with explicit schema`, async t => {
155+
t.plan(1);
156+
const tableId = generateUuid();
157+
158+
const output = await tools.runAsync(
159+
`${cmd} load-gcs-parquet ${projectId} ${datasetId} ${tableId}`,
160+
cwd
161+
);
162+
t.regex(output, /completed\./);
163+
await tools
164+
.tryTest(async assert => {
165+
const [rows] = await bigquery
166+
.dataset(datasetId)
167+
.table(tableId)
168+
.getRows();
169+
assert(rows.length > 0);
170+
})
171+
.start();
172+
});
173+
154174
test(`should load a GCS CSV file with explicit schema`, async t => {
155175
t.plan(1);
156176
const tableId = generateUuid();

handwritten/bigquery/samples/tables.js

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,73 @@ function loadLocalFile(datasetId, tableId, filename, projectId) {
240240
// [END bigquery_load_from_file]
241241
}
242242

243+
function loadParquetFromGCS(datasetId, tableId, projectId) {
244+
// [START bigquery_load_table_gcs_parquet]
245+
// Imports the Google Cloud client libraries
246+
const BigQuery = require('@google-cloud/bigquery');
247+
const Storage = require('@google-cloud/storage');
248+
249+
/**
250+
* TODO(developer): Uncomment the following lines before running the sample.
251+
*/
252+
// const projectId = "your-project-id";
253+
// const datasetId = "my_dataset";
254+
// const tableId = "my_table";
255+
256+
/**
257+
* This sample loads the Parquet file at
258+
* https://storage.googleapis.com/cloud-samples-data/bigquery/us-states/us-states.parquet
259+
*
260+
* TODO(developer): Replace the following lines with the path to your file.
261+
*/
262+
const bucketName = 'cloud-samples-data';
263+
const filename = 'bigquery/us-states/us-states.parquet';
264+
265+
// Instantiates clients
266+
const bigquery = new BigQuery({
267+
projectId: projectId,
268+
});
269+
270+
const storage = new Storage({
271+
projectId: projectId,
272+
});
273+
274+
// Configure the load job. For full list of options, see:
275+
// https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load
276+
const metadata = {
277+
sourceFormat: 'PARQUET',
278+
schema: {
279+
fields: [
280+
{name: 'name', type: 'STRING'},
281+
{name: 'post_abbr', type: 'STRING'},
282+
],
283+
},
284+
};
285+
286+
// Loads data from a Google Cloud Storage file into the table
287+
bigquery
288+
.dataset(datasetId)
289+
.table(tableId)
290+
.load(storage.bucket(bucketName).file(filename), metadata)
291+
.then(results => {
292+
const job = results[0];
293+
294+
// load() waits for the job to finish
295+
assert.equal(job.status.state, 'DONE');
296+
console.log(`Job ${job.id} completed.`);
297+
298+
// Check the job's status for errors
299+
const errors = job.status.errors;
300+
if (errors && errors.length > 0) {
301+
throw errors;
302+
}
303+
})
304+
.catch(err => {
305+
console.error('ERROR:', err);
306+
});
307+
// [END bigquery_load_table_gcs_parquet]
308+
}
309+
243310
function loadCSVFromGCS(datasetId, tableId, projectId) {
244311
// [START bigquery_load_table_gcs_csv]
245312
// Imports the Google Cloud client libraries
@@ -667,6 +734,14 @@ require(`yargs`)
667734
);
668735
}
669736
)
737+
.command(
738+
`load-gcs-parquet <projectId> <datasetId> <tableId>`,
739+
`Loads sample Parquet data from a Google Cloud Storage file into a table.`,
740+
{},
741+
opts => {
742+
loadParquetFromGCS(opts.datasetId, opts.tableId, opts.projectId);
743+
}
744+
)
670745
.command(
671746
`load-gcs-csv <projectId> <datasetId> <tableId>`,
672747
`Loads sample CSV data from a Google Cloud Storage file into a table.`,

handwritten/bigquery/src/table.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ var FORMATS = {
3838
avro: 'AVRO',
3939
csv: 'CSV',
4040
json: 'NEWLINE_DELIMITED_JSON',
41+
parquet: 'PARQUET',
4142
};
4243

4344
/**
@@ -763,7 +764,7 @@ Table.prototype.createCopyFromJob = function(sourceTables, metadata, callback) {
763764
* to. A string or a {@link https://cloud.google.com/nodejs/docs/reference/storage/latest/File File} object.
764765
* @param {object=} options - The configuration object.
765766
* @param {string} options.format - The format to export the data in. Allowed
766-
* options are "CSV", "JSON", or "AVRO". Default: "CSV".
767+
* options are "CSV", "JSON", "AVRO", or "PARQUET". Default: "CSV".
767768
* @param {boolean} options.gzip - Specify if you would like the file compressed
768769
* with GZIP. Default: false.
769770
* @param {string} [options.jobId] Custom job id.

handwritten/bigquery/test/table.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,16 @@ describe('BigQuery/Table', function() {
855855

856856
table.createExtractJob(FILE, {format: 'avro'}, assert.ifError);
857857
});
858+
859+
it('should accept parquet', function(done) {
860+
table.bigQuery.createJob = function(reqOpts) {
861+
var extract = reqOpts.configuration.extract;
862+
assert.equal(extract.destinationFormat, 'PARQUET');
863+
done();
864+
};
865+
866+
table.createExtractJob(FILE, {format: 'parquet'}, assert.ifError);
867+
});
858868
});
859869

860870
it('should parse out full gs:// urls from files', function(done) {

0 commit comments

Comments
 (0)