Skip to content

Commit cd3367d

Browse files
JavaScript: Add ProjectParser for whole-project parsing (#6458)
- Add `ProjectParser` class that handles file discovery, classification, and parsing with Prettier config detection - Add `ParseProject` RPC method (Java client + TypeScript handler) to parse entire project directories - Move Prettier detection from `JavaScriptParser` to project-level (`ProjectParser` adds `PrettierStyle` markers) - Refactor CLI utilities to use `ProjectParser` with new `fileFilter` option for selective parsing - Add `DiscoveredFiles` interface with categorized file lists (jsFiles, lockFiles, jsonFiles, etc.)
1 parent c5608cf commit cd3367d

38 files changed

Lines changed: 2570 additions & 377 deletions

rewrite-core/src/main/java/org/openrewrite/marker/Markers.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ public <M extends Marker> Optional<M> findFirst(Class<M> markerType) {
174174
@Override
175175
public void rpcSend(Markers after, RpcSendQueue q) {
176176
q.getAndSend(this, Markers::getId);
177-
q.getAndSendList(this, Markers::getMarkers, Marker::getId, null);
177+
q.getAndSendListAsRef(this, Markers::getMarkers, Marker::getId, null);
178178
}
179179

180180
@Override

rewrite-javascript/rewrite/src/cli/cli-utils.ts

Lines changed: 46 additions & 237 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,11 @@
1414
* limitations under the License.
1515
*/
1616
import * as fs from 'fs';
17-
import * as fsp from 'fs/promises';
1817
import * as path from 'path';
19-
import {spawn, spawnSync} from 'child_process';
18+
import {spawn} from 'child_process';
2019
import {Recipe, RecipeRegistry} from '../recipe';
2120
import {SourceFile} from '../tree';
22-
import {
23-
isYarnBerryLockFile,
24-
JavaScriptParser,
25-
JSON_LOCK_FILE_NAMES,
26-
PackageJsonParser,
27-
TEXT_LOCK_FILE_NAMES,
28-
YAML_LOCK_FILE_NAMES
29-
} from '../javascript';
30-
import {JsonParser} from '../json';
31-
import {PlainTextParser} from '../text';
32-
import {YamlParser} from '../yaml';
21+
import {ProjectParser} from '../javascript/project-parser';
3322

3423
// ANSI color codes
3524
const colors = {
@@ -339,123 +328,28 @@ export function findRecipe(
339328
}
340329

341330
/**
342-
* Discover source files in a project directory, respecting .gitignore
331+
* Discover source files in a project directory, respecting .gitignore.
332+
* Delegates to ProjectParser for file discovery.
343333
*/
344334
export async function discoverFiles(projectRoot: string, verbose: boolean = false): Promise<string[]> {
345-
const files: string[] = [];
346-
347-
if (verbose) {
348-
console.log(`Discovering files in ${projectRoot}...`);
349-
}
350-
351-
// Get list of git-ignored files
352-
const ignoredFiles = new Set<string>();
353-
try {
354-
const result = spawnSync('git', ['ls-files', '--ignored', '--exclude-standard', '-o'], {
355-
cwd: projectRoot,
356-
encoding: 'utf8'
357-
});
358-
if (result.stdout) {
359-
for (const line of result.stdout.split('\n')) {
360-
if (line.trim()) {
361-
ignoredFiles.add(path.join(projectRoot, line.trim()));
362-
}
363-
}
364-
}
365-
} catch {
366-
// Git not available or not a git repository
367-
}
368-
369-
// Get tracked and untracked (but not ignored) files
370-
const trackedFiles = new Set<string>();
371-
try {
372-
// Get tracked files
373-
const tracked = spawnSync('git', ['ls-files'], {
374-
cwd: projectRoot,
375-
encoding: 'utf8'
376-
});
377-
// Check if git command failed (not a git repository)
378-
if (tracked.status !== 0 || tracked.error) {
379-
// Not a git repository, fall back to recursive directory scan
380-
await walkDirectory(projectRoot, files, ignoredFiles, projectRoot);
381-
return files.filter(isAcceptedFile);
382-
}
383-
if (tracked.stdout) {
384-
for (const line of tracked.stdout.split('\n')) {
385-
if (line.trim()) {
386-
trackedFiles.add(path.join(projectRoot, line.trim()));
387-
}
388-
}
389-
}
390-
391-
// Get untracked but not ignored files
392-
const untracked = spawnSync('git', ['ls-files', '--others', '--exclude-standard'], {
393-
cwd: projectRoot,
394-
encoding: 'utf8'
395-
});
396-
if (untracked.stdout) {
397-
for (const line of untracked.stdout.split('\n')) {
398-
if (line.trim()) {
399-
trackedFiles.add(path.join(projectRoot, line.trim()));
400-
}
401-
}
402-
}
403-
} catch {
404-
// Not a git repository, fall back to recursive directory scan
405-
await walkDirectory(projectRoot, files, ignoredFiles, projectRoot);
406-
return files.filter(isAcceptedFile);
407-
}
408-
409-
// Filter to accepted file types that exist on disk
410-
// (git ls-files returns deleted files that are still tracked)
411-
for (const file of trackedFiles) {
412-
if (!ignoredFiles.has(file) && isAcceptedFile(file) && fs.existsSync(file)) {
413-
files.push(file);
414-
}
415-
}
416-
417-
return files;
418-
}
419-
420-
/**
421-
* Walk a directory recursively, collecting files
422-
*/
423-
export async function walkDirectory(
424-
dir: string,
425-
files: string[],
426-
ignored: Set<string>,
427-
projectRoot: string
428-
): Promise<void> {
429-
const entries = await fsp.readdir(dir, {withFileTypes: true});
430-
431-
for (const entry of entries) {
432-
const fullPath = path.join(dir, entry.name);
433-
434-
// Skip hidden files and common ignore patterns
435-
if (entry.name.startsWith('.') || entry.name === 'node_modules' || entry.name === 'dist' ||
436-
entry.name === 'build' || entry.name === 'coverage') {
437-
continue;
438-
}
439-
440-
if (ignored.has(fullPath)) {
441-
continue;
442-
}
443-
444-
if (entry.isDirectory()) {
445-
await walkDirectory(fullPath, files, ignored, projectRoot);
446-
} else if (entry.isFile() && isAcceptedFile(fullPath)) {
447-
files.push(fullPath);
448-
}
449-
}
335+
const parser = new ProjectParser(projectRoot, {verbose});
336+
const discovered = await parser.discoverFiles();
337+
338+
// Flatten all discovered files into a single array
339+
return [
340+
...discovered.packageJsonFiles,
341+
...discovered.lockFiles.json,
342+
...discovered.lockFiles.yaml,
343+
...discovered.lockFiles.text,
344+
...discovered.jsFiles,
345+
...discovered.jsonFiles,
346+
...discovered.yamlFiles,
347+
...discovered.textFiles
348+
];
450349
}
451350

452351
/**
453-
* All lock file names (typed as string[] for easier comparison)
454-
*/
455-
const ALL_LOCK_FILE_NAMES: readonly string[] = [...JSON_LOCK_FILE_NAMES, ...YAML_LOCK_FILE_NAMES, ...TEXT_LOCK_FILE_NAMES];
456-
457-
/**
458-
* Check if a file is accepted for parsing based on its extension
352+
* Check if a file is accepted for parsing based on its extension.
459353
*/
460354
export function isAcceptedFile(filePath: string): boolean {
461355
const ext = path.extname(filePath).toLowerCase();
@@ -466,7 +360,7 @@ export function isAcceptedFile(filePath: string): boolean {
466360
return true;
467361
}
468362

469-
// JSON files (including package.json which gets special parsing)
363+
// JSON files
470364
if (ext === '.json') {
471365
return true;
472366
}
@@ -476,8 +370,13 @@ export function isAcceptedFile(filePath: string): boolean {
476370
return true;
477371
}
478372

479-
// Lock files (some have non-standard extensions like yarn.lock)
480-
if (ALL_LOCK_FILE_NAMES.includes(basename)) {
373+
// Lock files (yarn.lock has no extension)
374+
if (['yarn.lock', 'pnpm-lock.yaml', 'package-lock.json', 'bun.lock'].includes(basename)) {
375+
return true;
376+
}
377+
378+
// Text config files
379+
if (['.prettierignore', '.gitignore', '.npmignore', '.eslintignore'].includes(basename)) {
481380
return true;
482381
}
483382

@@ -491,127 +390,37 @@ export interface ParseFilesOptions {
491390
onProgress?: ProgressCallback;
492391
}
493392

494-
/**
495-
* Internal context for file parsing progress tracking.
496-
*/
497-
interface ParseContext {
498-
current: number;
499-
total: number;
500-
verbose: boolean;
501-
onProgress?: ProgressCallback;
502-
}
503-
504-
/**
505-
* Helper to parse files with a given parser, handling verbose logging and progress.
506-
*/
507-
async function* parseWithParser(
508-
files: string[],
509-
parser: { parse(...files: string[]): AsyncGenerator<SourceFile> },
510-
fileType: string,
511-
ctx: ParseContext
512-
): AsyncGenerator<SourceFile, ParseContext> {
513-
if (files.length === 0) {
514-
return ctx;
515-
}
516-
517-
if (ctx.verbose) {
518-
console.log(`Parsing ${files.length} ${fileType} files...`);
519-
}
520-
521-
for await (const sf of parser.parse(...files)) {
522-
ctx.current++;
523-
ctx.onProgress?.(ctx.current, ctx.total, sf.sourcePath);
524-
yield sf;
525-
}
526-
527-
return ctx;
528-
}
529-
530-
/**
531-
* Classifies a yarn.lock file as YAML (Berry) or text (Classic) based on its content.
532-
* Returns 'yaml' for Yarn Berry (v2+) and 'text' for Yarn Classic (v1).
533-
*/
534-
async function classifyYarnLockFile(filePath: string): Promise<'yaml' | 'text'> {
535-
try {
536-
const content = await fsp.readFile(filePath, 'utf-8');
537-
return isYarnBerryLockFile(content) ? 'yaml' : 'text';
538-
} catch {
539-
// Default to text format if we can't read the file
540-
return 'text';
541-
}
542-
}
543-
544393
/**
545394
* Parse source files using appropriate parsers (streaming version).
546395
* Yields source files as they are parsed, allowing immediate processing.
396+
*
397+
* Uses ProjectParser with a file filter to parse only the specified files.
398+
* This handles Prettier detection, file classification, and appropriate parser selection.
547399
*/
548400
export async function* parseFilesStreaming(
549401
filePaths: string[],
550402
projectRoot: string,
551403
options: ParseFilesOptions = {}
552404
): AsyncGenerator<SourceFile, void, undefined> {
553-
const { verbose = false, onProgress } = options;
554-
const total = filePaths.length;
555-
let current = 0;
405+
const {verbose = false, onProgress} = options;
556406

557-
// Group files by type
558-
const jsFiles: string[] = [];
559-
const packageJsonFiles: string[] = [];
560-
const jsonFiles: string[] = [];
561-
const jsonLockFiles: string[] = [];
562-
const yamlLockFiles: string[] = [];
563-
const yamlFiles: string[] = [];
564-
const textLockFiles: string[] = [];
565-
566-
// Collect yarn.lock files for content-based classification
567-
const yarnLockFiles: string[] = [];
568-
569-
for (const filePath of filePaths) {
570-
const basename = path.basename(filePath);
571-
const ext = path.extname(filePath).toLowerCase();
572-
573-
if (basename === 'package.json') {
574-
packageJsonFiles.push(filePath);
575-
} else if (['.js', '.jsx', '.ts', '.tsx', '.mjs', '.mts', '.cjs', '.cts'].includes(ext)) {
576-
jsFiles.push(filePath);
577-
} else if ((JSON_LOCK_FILE_NAMES as readonly string[]).includes(basename)) {
578-
jsonLockFiles.push(filePath);
579-
} else if ((YAML_LOCK_FILE_NAMES as readonly string[]).includes(basename)) {
580-
yamlLockFiles.push(filePath);
581-
} else if (basename === 'yarn.lock') {
582-
// yarn.lock needs content-based classification
583-
yarnLockFiles.push(filePath);
584-
} else if ((TEXT_LOCK_FILE_NAMES as readonly string[]).includes(basename)) {
585-
// Other text lock files (if any besides yarn.lock)
586-
textLockFiles.push(filePath);
587-
} else if (ext === '.json') {
588-
jsonFiles.push(filePath);
589-
} else if (['.yaml', '.yml'].includes(ext)) {
590-
yamlFiles.push(filePath);
591-
}
592-
}
407+
// Create a set for fast lookup
408+
const fileSet = new Set(filePaths.map(f => path.resolve(f)));
409+
let current = 0;
410+
const total = filePaths.length;
593411

594-
// Classify yarn.lock files by content (Yarn Berry uses YAML, Classic uses text)
595-
for (const yarnLockPath of yarnLockFiles) {
596-
const format = await classifyYarnLockFile(yarnLockPath);
597-
if (format === 'yaml') {
598-
yamlLockFiles.push(yarnLockPath);
599-
} else {
600-
textLockFiles.push(yarnLockPath);
601-
}
602-
}
412+
const parser = new ProjectParser(projectRoot, {
413+
verbose,
414+
fileFilter: (absolutePath) => fileSet.has(absolutePath),
415+
onProgress: onProgress ? (phase, cur, tot, filePath) => {
416+
if (phase === "parsing" && filePath) {
417+
current++;
418+
onProgress(current, total, filePath);
419+
}
420+
} : undefined
421+
});
603422

604-
// Create parse context for tracking progress
605-
const ctx: ParseContext = { current, total, verbose, onProgress };
606-
607-
// Parse files by type using helper
608-
yield* parseWithParser(jsFiles, new JavaScriptParser({relativeTo: projectRoot}), 'JavaScript/TypeScript', ctx);
609-
yield* parseWithParser(packageJsonFiles, new PackageJsonParser({relativeTo: projectRoot}), 'package.json', ctx);
610-
yield* parseWithParser(jsonLockFiles, new JsonParser({relativeTo: projectRoot}), 'JSON lock', ctx);
611-
yield* parseWithParser(yamlLockFiles, new YamlParser({relativeTo: projectRoot}), 'YAML lock', ctx);
612-
yield* parseWithParser(textLockFiles, new PlainTextParser({relativeTo: projectRoot}), 'text lock', ctx);
613-
yield* parseWithParser(yamlFiles, new YamlParser({relativeTo: projectRoot}), 'YAML', ctx);
614-
yield* parseWithParser(jsonFiles, new JsonParser({relativeTo: projectRoot}), 'JSON', ctx);
423+
yield* parser.parse();
615424
}
616425

617426
/**

0 commit comments

Comments
 (0)