✏️ 文件编辑:AI 辅助代码修改

graph TB
    subgraph "File Editing Pipeline"
        Read[ReadTool] -->|cat -n format| Display[LLM Sees]
        Display -->|Strips line numbers| Edit[EditTool]

        Edit --> Validate{Validation}
        Validate -->|Pass| Apply[Apply Edit]
        Validate -->|Fail| Error[Error Result]

        Apply --> Cache[Update Cache]
        Cache --> Diff[Generate Diff]
        Diff --> Confirm[Confirmation]

        subgraph "Validation Checks"
            V1[File was read?]
            V2[File unchanged?]
            V3[String exists?]
            V4[Count matches?]
            V5[Not no-op?]
        end

        Validate --> V1
        V1 --> V2
        V2 --> V3
        V3 --> V4
        V4 --> V5
    end

文件编辑流水线架构

Claude Code 中的文件编辑不仅仅是修改文本——它是一个精心设计的流水线系统,用于处理 AI 辅助代码修改的复杂性:

class FileEditingPipeline {
  // 四阶段编辑循环
  static async executeEdit(
    tool: EditTool,
    input: EditInput,
    context: ToolContext
  ): Promise<EditResult> {
    // 阶段 1:验证
    const validation = await this.validateEdit(input, context);
    if (!validation.valid) {
      return { success: false, error: validation.error };
    }

    // 阶段 2:准备
    const prepared = await this.prepareEdit(input, validation.fileState);

    // 阶段 3:应用
    const result = await this.applyEdit(prepared);

    // 阶段 4:验证
    const verified = await this.verifyEdit(result, input);

    return verified;
  }

  // 状态追踪系统
  private static fileStates = new Map<string, FileState>();

  interface FileState {
    content: string;
    hash: string;
    mtime: number;
    encoding: BufferEncoding;
    lineEndings: '\\n' | '\\r\\n' | '\\r';
    isBinary: boolean;
    size: number;
  }
}

为什么使用多个工具而不是一个通用编辑器?

工具用途保证失败模式
EditTool单个字符串替换精确匹配计数如果出现次数 ≠ 预期则失败
MultiEditTool顺序编辑原子批处理如果任何编辑无效则失败
WriteTool完全替换完全覆写如果未先读取则失败
NotebookEditTool单元格操作保留结构如果单元格缺失则失败

每个工具都提供了通用编辑器在保持对 LLM 友好性的同时无法维护的特定保证。

行号问题:一个看似简单实则复杂的挑战

文件编辑中最关键的挑战是行号前缀问题:

// LLM 从 ReadTool 看到的内容:
const readOutput = `
1\tfunction hello() {
2\t  console.log('Hello, world!');
3\t}
`;

// LLM 可能错误尝试编辑的内容:
const wrongOldString = "2\t  console.log('Hello, world!');"; // 错误 - 包含行号

// 应该使用的内容:
const correctOldString = "  console.log('Hello, world!');"; // 正确 - 无行号

行号剥离逻辑:

class LineNumberHandler {
  // LLM 收到关于此的详细说明
  static readonly LINE_NUMBER_PATTERN = /^\d+\t/;

  static stripLineNumbers(content: string): string {
    return content
      .split("\\n")
      .map((line) => line.replace(this.LINE_NUMBER_PATTERN, ""))
      .join("\\n");
  }

  // 但真正的挑战是确保 LLM 执行此操作
  static validateOldString(
    oldString: string,
    fileContent: string,
  ): ValidationResult {
    // 检查 1:oldString 是否包含行号前缀?
    if (this.LINE_NUMBER_PATTERN.test(oldString)) {
      return {
        valid: false,
        error:
          "old_string appears to contain line number prefix. " +
          "Remove the number and tab at the start.",
        suggestion: oldString.replace(this.LINE_NUMBER_PATTERN, ""),
      };
    }

    // 检查 2:字符串是否存在于文件中?
    const occurrences = this.countOccurrences(fileContent, oldString);
    if (occurrences === 0) {
      // 尝试检测是否为行号问题
      const possibleLineNumber = oldString.match(/^(\d+)\t/);
      if (possibleLineNumber) {
        const lineNum = parseInt(possibleLineNumber[1]);
        const actualLine = this.getLine(fileContent, lineNum);
        return {
          valid: false,
          error: `String not found. Did you include line number ${lineNum}?`,
          suggestion: actualLine,
        };
      }
    }

    return { valid: true, occurrences };
  }
}

EditTool:字符串替换的外科手术精度

EditTool 实现了零歧义的精确字符串匹配:

class EditToolImplementation {
  static async executeEdit(
    input: EditInput,
    context: ToolContext,
  ): Promise<EditResult> {
    const {
      file_path,
      old_string,
      new_string,
      expected_replacements = 1,
    } = input;

    // 步骤 1:检索缓存的文件状态
    const cachedFile = context.readFileState.get(file_path);
    if (!cachedFile) {
      throw new Error(
        "File must be read with ReadFileTool before editing. " +
          "This ensures you have the current file content.",
      );
    }

    // 步骤 2:验证文件未被外部更改
    const currentStats = await fs.stat(file_path);
    if (currentStats.mtimeMs !== cachedFile.timestamp) {
      throw new Error(
        "File has been modified externally since last read. " +
          "Please read the file again to see current content.",
      );
    }

    // 步骤 3:验证编辑
    const validation = this.validateEdit(
      old_string,
      new_string,
      cachedFile.content,
      expected_replacements,
    );

    if (!validation.valid) {
      throw new Error(validation.error);
    }

    // 步骤 4:应用替换
    const newContent = this.performReplacement(
      cachedFile.content,
      old_string,
      new_string,
      expected_replacements,
    );

    // 步骤 5:生成 diff 用于验证
    const diff = this.generateDiff(cachedFile.content, newContent, file_path);

    // 步骤 6:使用相同的编码/行尾写入
    await this.writeFilePreservingFormat(file_path, newContent, cachedFile);

    // 步骤 7:更新缓存
    context.readFileState.set(file_path, {
      content: newContent,
      timestamp: Date.now(),
    });

    // 步骤 8:生成上下文片段
    const snippet = this.generateContextSnippet(
      newContent,
      new_string,
      5, // 上下文行数
    );

    return {
      success: true,
      diff,
      snippet,
      replacements: expected_replacements,
    };
  }

  private static validateEdit(
    oldString: string,
    newString: string,
    fileContent: string,
    expectedReplacements: number,
  ): EditValidation {
    // 无操作检查
    if (oldString === newString) {
      return {
        valid: false,
        error:
          "old_string and new_string are identical. No changes would be made.",
      };
    }

    // 空 old_string 特殊情况(插入)
    if (oldString === "") {
      return {
        valid: false,
        error: "Empty old_string not allowed. Use WriteTool for new files.",
      };
    }

    // 使用精确字符串匹配计数出现次数
    const occurrences = this.countExactOccurrences(fileContent, oldString);

    if (occurrences === 0) {
      return {
        valid: false,
        error:
          "old_string not found in file. Ensure exact match including whitespace.",
        suggestion: this.findSimilarStrings(fileContent, oldString),
      };
    }

    if (occurrences !== expectedReplacements) {
      return {
        valid: false,
        error:
          `Expected ${expectedReplacements} replacement(s) but found ${occurrences} occurrence(s). ` +
          `Set expected_replacements to ${occurrences} or refine old_string.`,
      };
    }

    return { valid: true };
  }

  private static countExactOccurrences(
    content: string,
    searchString: string,
  ): number {
    // 转义特殊正则字符以进行精确匹配
    const escaped = searchString.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
    const regex = new RegExp(escaped, "g");
    return (content.match(regex) || []).length;
  }

  private static performReplacement(
    content: string,
    oldString: string,
    newString: string,
    limit: number,
  ): string {
    // 为特殊替换模式转义字符
    const escapeReplacement = (str: string) => {
      return str
        .replace(/\$/g, "$$$$") // $ -> $$
        .replace(/\n/g, "\\n") // 保留换行符
        .replace(/\r/g, "\\r"); // 保留回车符
    };

    const escapedNew = escapeReplacement(newString);

    let result = content;
    let count = 0;
    let lastIndex = 0;

    // 手动替换以遵守限制
    while (count < limit) {
      const index = result.indexOf(oldString, lastIndex);
      if (index === -1) break;

      result =
        result.slice(0, index) +
        newString + // 使用原始字符串,非转义版本
        result.slice(index + oldString.length);

      lastIndex = index + newString.length;
      count++;
    }

    return result;
  }

  private static generateDiff(
    oldContent: string,
    newContent: string,
    filePath: string,
  ): string {
    // 使用统一 diff 格式
    const diff = createUnifiedDiff(
      filePath,
      filePath,
      oldContent,
      newContent,
      "before edit",
      "after edit",
      { context: 3 },
    );

    return diff;
  }
}

为什么 expected_replacements 很重要

// 场景:多次出现
const fileContent = `
function processUser(user) {
  console.log(user);
  return user;
}
`;

// 没有 expected_replacements:
edit({
  old_string: "user",
  new_string: "userData",
});
// 结果:所有出现都被替换(函数参数也被替换了!)

// 有 expected_replacements:
edit({
  old_string: "user",
  new_string: "userData",
  expected_replacements: 2, // 仅使用,不包括参数
});
// 结果:失败 - 强制使用更具体的 old_string

MultiEditTool:原子顺序操作

MultiEditTool 解决了多个相关编辑的复杂问题:

class MultiEditToolImplementation {
  static async executeMultiEdit(
    input: MultiEditInput,
    context: ToolContext,
  ): Promise<MultiEditResult> {
    const { file_path, edits } = input;

    // 一次加载文件
    const cachedFile = context.readFileState.get(file_path);
    if (!cachedFile) {
      throw new Error("File must be read before editing");
    }

    // 在应用任何编辑之前验证所有编辑
    const validationResult = this.validateAllEdits(edits, cachedFile.content);

    if (!validationResult.valid) {
      throw new Error(validationResult.error);
    }

    // 顺序应用编辑到工作副本
    let workingContent = cachedFile.content;
    const appliedEdits: AppliedEdit[] = [];

    for (let i = 0; i < edits.length; i++) {
      const edit = edits[i];

      try {
        // 针对当前工作内容验证此编辑
        const validation = this.validateSingleEdit(edit, workingContent, i);

        if (!validation.valid) {
          throw new Error(`Edit ${i + 1} failed: ${validation.error}`);
        }

        // 应用编辑
        const beforeEdit = workingContent;
        workingContent = this.applyEdit(workingContent, edit);

        appliedEdits.push({
          index: i,
          edit,
          diff: this.generateEditDiff(beforeEdit, workingContent),
          summary: this.summarizeEdit(edit),
        });
      } catch (error) {
        // 原子失败 - 不写入任何更改
        throw new Error(
          `MultiEdit aborted at edit ${i + 1}/${edits.length}: ${error.message}`,
        );
      }
    }

    // 所有编辑已验证并应用 - 一次写入
    await this.writeFilePreservingFormat(file_path, workingContent, cachedFile);

    // 更新缓存
    context.readFileState.set(file_path, {
      content: workingContent,
      timestamp: Date.now(),
    });

    return {
      success: true,
      editsApplied: appliedEdits,
      totalDiff: this.generateDiff(
        cachedFile.content,
        workingContent,
        file_path,
      ),
    };
  }

  private static validateAllEdits(
    edits: Edit[],
    originalContent: string,
  ): ValidationResult {
    // 检查空编辑数组
    if (edits.length === 0) {
      return {
        valid: false,
        error: "No edits provided",
      };
    }

    // 检测潜在冲突
    const conflicts = this.detectEditConflicts(edits, originalContent);
    if (conflicts.length > 0) {
      return {
        valid: false,
        error:
          "Edit conflicts detected:\\n" +
          conflicts.map((c) => c.description).join("\\n"),
      };
    }

    // 模拟所有编辑以确保它们有效
    let simulatedContent = originalContent;
    for (let i = 0; i < edits.length; i++) {
      const edit = edits[i];
      const occurrences = this.countOccurrences(
        simulatedContent,
        edit.old_string,
      );

      if (occurrences === 0) {
        return {
          valid: false,
          error:
            `Edit ${i + 1}: old_string not found. ` +
            `Previous edits may have removed it.`,
        };
      }

      if (occurrences !== (edit.expected_replacements || 1)) {
        return {
          valid: false,
          error:
            `Edit ${i + 1}: Expected ${edit.expected_replacements || 1} ` +
            `replacements but found ${occurrences}`,
        };
      }

      // 应用到模拟
      simulatedContent = this.applyEdit(simulatedContent, edit);
    }

    return { valid: true };
  }

  private static detectEditConflicts(
    edits: Edit[],
    content: string,
  ): EditConflict[] {
    const conflicts: EditConflict[] = [];

    for (let i = 0; i < edits.length - 1; i++) {
      for (let j = i + 1; j < edits.length; j++) {
        const edit1 = edits[i];
        const edit2 = edits[j];

        // 冲突类型 1:后续编辑修改前一个编辑的结果
        if (edit2.old_string.includes(edit1.new_string)) {
          conflicts.push({
            type: "dependency",
            edits: [i, j],
            description: `Edit ${j + 1} depends on result of edit ${i + 1}`,
          });
        }

        // 冲突类型 2:重叠替换
        if (this.editsOverlap(edit1, edit2, content)) {
          conflicts.push({
            type: "overlap",
            edits: [i, j],
            description: `Edits ${i + 1} and ${j + 1} affect overlapping text`,
          });
        }

        // 冲突类型 3:相同目标,不同替换
        if (
          edit1.old_string === edit2.old_string &&
          edit1.new_string !== edit2.new_string
        ) {
          conflicts.push({
            type: "contradiction",
            edits: [i, j],
            description: `Edits ${i + 1} and ${j + 1} replace same text differently`,
          });
        }
      }
    }

    return conflicts;
  }

  private static editsOverlap(
    edit1: Edit,
    edit2: Edit,
    content: string,
  ): boolean {
    // 查找所有出现的位置
    const positions1 = this.findAllPositions(content, edit1.old_string);
    const positions2 = this.findAllPositions(content, edit2.old_string);

    // 检查是否有任何位置重叠
    for (const pos1 of positions1) {
      const end1 = pos1 + edit1.old_string.length;

      for (const pos2 of positions2) {
        const end2 = pos2 + edit2.old_string.length;

        // 检查重叠
        if (pos1 < end2 && pos2 < end1) {
          return true;
        }
      }
    }

    return false;
  }
}

冲突检测实战

// 示例:依赖编辑
const edits = [
  {
    old_string: "console.log",
    new_string: "logger.info",
  },
  {
    old_string: "logger.info('test')", // 依赖于第一个编辑!
    new_string: "logger.debug('test')",
  },
];
// 结果:检测到冲突 - 编辑 2 依赖于编辑 1

// 示例:安全的顺序编辑
const safeEdits = [
  {
    old_string: "var x",
    new_string: "let x",
  },
  {
    old_string: "var y",
    new_string: "let y",
  },
];
// 结果:无冲突 - 独立更改

WriteTool:完整文件操作

WriteTool 处理完整的文件创建或替换:

class WriteToolImplementation {
  static async executeWrite(
    input: WriteInput,
    context: ToolContext,
  ): Promise<WriteResult> {
    const { file_path, content } = input;

    // 检查文件是否存在
    const exists = await fs
      .access(file_path)
      .then(() => true)
      .catch(() => false);

    if (exists) {
      // 现有文件 - 必须已读取
      const cachedFile = context.readFileState.get(file_path);
      if (!cachedFile) {
        throw new Error(
          "Existing file must be read with ReadFileTool before overwriting. " +
            "This prevents accidental data loss.",
        );
      }

      // 验证未被外部修改
      const stats = await fs.stat(file_path);
      if (stats.mtimeMs !== cachedFile.timestamp) {
        throw new Error(
          "File has been modified externally. " +
            "Read the file again to see current content before overwriting.",
        );
      }
    }

    // 文档文件限制
    if (this.isDocumentationFile(file_path) && !context.explicitlyAllowed) {
      throw new Error(
        "Creating documentation files (*.md, README) requires explicit user request. " +
          "Focus on code implementation unless specifically asked for docs.",
      );
    }

    // 准备写入操作
    const writeData = await this.prepareWriteData(
      content,
      exists ? context.readFileState.get(file_path) : null,
    );

    // 确保目录存在
    const dir = path.dirname(file_path);
    await fs.mkdir(dir, { recursive: true });

    // 写入文件
    await fs.writeFile(file_path, writeData.content, {
      encoding: writeData.encoding,
      mode: writeData.mode,
    });

    // 更新缓存
    context.readFileState.set(file_path, {
      content: content,
      timestamp: Date.now(),
    });

    // 生成结果
    if (exists) {
      const snippet = this.generateContextSnippet(content, null, 10);
      return {
        success: true,
        action: "updated",
        snippet,
      };
    } else {
      return {
        success: true,
        action: "created",
        path: file_path,
      };
    }
  }

  private static async prepareWriteData(
    content: string,
    existingFile: FileState | null,
  ): Promise<WriteData> {
    // 检测或保留行尾
    let lineEnding = "\\n"; // 默认为 LF

    if (existingFile) {
      // 保留现有行尾
      lineEnding = existingFile.lineEndings;
    } else if (process.platform === "win32") {
      // Windows 上新文件默认为 CRLF
      lineEnding = "\\r\\n";
    }

    // 规范化然后应用正确的行尾
    const normalizedContent = content.replace(/\r\n|\r|\n/g, "\\n");
    const finalContent = normalizedContent.replace(/\n/g, lineEnding);

    // 检测编码(简化 - 实际实现更复杂)
    const encoding = existingFile?.encoding || "utf8";

    // 更新时保留文件模式
    const mode = existingFile ? (await fs.stat(existingFile.path)).mode : 0o644;

    return {
      content: finalContent,
      encoding,
      mode,
    };
  }
}

验证层:深度防御

每个编辑操作都经过多个验证层:

class FileValidationPipeline {
  static async validateFileOperation(
    operation: FileOperation,
    context: ToolContext,
  ): Promise<ValidationResult> {
    // 层 1:路径验证
    const pathValidation = await this.validatePath(operation.path, context);
    if (!pathValidation.valid) return pathValidation;

    // 层 2:权限检查
    const permissionCheck = await this.checkPermissions(operation, context);
    if (!permissionCheck.valid) return permissionCheck;

    // 层 3:文件状态验证
    const stateValidation = await this.validateFileState(operation, context);
    if (!stateValidation.valid) return stateValidation;

    // 层 4:内容验证
    const contentValidation = await this.validateContent(operation);
    if (!contentValidation.valid) return contentValidation;

    // 层 5:安全检查
    const safetyCheck = await this.performSafetyChecks(operation, context);
    if (!safetyCheck.valid) return safetyCheck;

    return { valid: true };
  }

  private static async validatePath(
    filePath: string,
    context: ToolContext,
  ): Promise<ValidationResult> {
    // 绝对路径要求
    if (!path.isAbsolute(filePath)) {
      return {
        valid: false,
        error: "File path must be absolute",
        suggestion: path.resolve(filePath),
      };
    }

    // 路径遍历防护
    const resolved = path.resolve(filePath);
    const normalized = path.normalize(filePath);

    if (resolved !== normalized) {
      return {
        valid: false,
        error: "Path contains suspicious traversal patterns",
      };
    }

    // 边界检查
    const projectRoot = context.projectRoot;
    const allowed = [projectRoot, ...context.additionalWorkingDirectories];

    const isAllowed = allowed.some((dir) =>
      resolved.startsWith(path.resolve(dir)),
    );

    if (!isAllowed) {
      return {
        valid: false,
        error: "Path is outside allowed directories",
        allowedDirs: allowed,
      };
    }

    // 特殊文件防护
    const forbidden = [
      /\.git\//,
      /node_modules\//,
      /\.env$/,
      /\.ssh\//,
      /\.gnupg\//,
    ];

    if (forbidden.some((pattern) => pattern.test(resolved))) {
      return {
        valid: false,
        error: "Operation on sensitive files not allowed",
      };
    }

    return { valid: true };
  }

  private static async validateFileState(
    operation: FileOperation,
    context: ToolContext,
  ): Promise<ValidationResult> {
    if (operation.type === "create") {
      // 检查文件是否已存在
      const exists = await fs
        .access(operation.path)
        .then(() => true)
        .catch(() => false);

      if (exists && !operation.overwrite) {
        return {
          valid: false,
          error:
            "File already exists. Use WriteTool with prior read to overwrite.",
        };
      }
    }

    if (operation.type === "edit" || operation.type === "overwrite") {
      const cached = context.readFileState.get(operation.path);

      if (!cached) {
        return {
          valid: false,
          error: "File must be read before editing",
        };
      }

      // 陈旧性检查
      try {
        const stats = await fs.stat(operation.path);
        if (stats.mtimeMs !== cached.timestamp) {
          const timeDiff = stats.mtimeMs - cached.timestamp;
          return {
            valid: false,
            error: "File has been modified externally",
            details: {
              cachedTime: new Date(cached.timestamp),
              currentTime: new Date(stats.mtimeMs),
              difference: `${Math.abs(timeDiff)}ms`,
            },
          };
        }
      } catch (error) {
        return {
          valid: false,
          error: "File no longer exists or is inaccessible",
        };
      }
    }

    return { valid: true };
  }
}

Diff 生成 & 反馈:闭环

每次编辑都会为 LLM 生成丰富的反馈:

class DiffGenerator {
  static generateEditFeedback(
    operation: EditOperation,
    result: EditResult,
  ): EditFeedback {
    const feedback: EditFeedback = {
      summary: this.generateSummary(operation, result),
      diff: this.generateDiff(operation, result),
      snippet: this.generateContextSnippet(operation, result),
      statistics: this.generateStatistics(operation, result),
    };

    return feedback;
  }

  private static generateDiff(
    operation: EditOperation,
    result: EditResult,
  ): string {
    const { oldContent, newContent, filePath } = result;

    // 根据更改大小使用不同的 diff 策略
    const changeRatio = this.calculateChangeRatio(oldContent, newContent);

    if (changeRatio < 0.1) {
      // 小更改 - 使用统一 diff
      return this.generateUnifiedDiff(oldContent, newContent, filePath, {
        context: 5,
      });
    } else if (changeRatio < 0.5) {
      // 中等更改 - 使用单词 diff
      return this.generateWordDiff(oldContent, newContent, filePath);
    } else {
      // 大更改 - 使用摘要 diff
      return this.generateSummaryDiff(oldContent, newContent, filePath);
    }
  }

  private static generateContextSnippet(
    operation: EditOperation,
    result: EditResult,
  ): string {
    const { newContent, changedRanges } = result;
    const lines = newContent.split("\\n");
    const snippets: string[] = [];

    for (const range of changedRanges) {
      const start = Math.max(0, range.start - 5);
      const end = Math.min(lines.length, range.end + 5);

      const snippet = lines
        .slice(start, end)
        .map((line, idx) => {
          const lineNum = start + idx + 1;
          const isChanged = lineNum >= range.start && lineNum <= range.end;
          const prefix = isChanged ? ">" : " ";
          return `${prefix} ${lineNum}\\t${line}`;
        })
        .join("\\n");

      snippets.push(snippet);
    }

    // 限制总片段大小
    const combined = snippets.join("\\n...\\n");
    if (combined.length > 1000) {
      return combined.substring(0, 1000) + "\\n... (truncated)";
    }

    return combined;
  }

  private static generateUnifiedDiff(
    oldContent: string,
    newContent: string,
    filePath: string,
    options: DiffOptions,
  ): string {
    const oldLines = oldContent.split("\\n");
    const newLines = newContent.split("\\n");

    // 使用 Myers diff 算法
    const diff = new MyersDiff(oldLines, newLines);
    const hunks = diff.getHunks(options.context);

    // 格式化为统一 diff
    const header = [
      `--- ${filePath}\\t(before edit)`,
      `+++ ${filePath}\\t(after edit)`,
      "",
    ].join("\\n");

    const formattedHunks = hunks
      .map((hunk) => {
        const range =
          `@@ -${hunk.oldStart},${hunk.oldLength} ` +
          `+${hunk.newStart},${hunk.newLength} @@`;

        const lines = hunk.lines.map((line) => {
          switch (line.type) {
            case "unchanged":
              return ` ${line.content}`;
            case "deleted":
              return `-${line.content}`;
            case "added":
              return `+${line.content}`;
          }
        });

        return [range, ...lines].join("\\n");
      })
      .join("\\n");

    return header + formattedHunks;
  }
}

特殊情况 & 边界条件

文件编辑必须处理众多边界情况:

class EdgeCaseHandlers {
  // 空文件处理
  static handleEmptyFile(
    operation: EditOperation,
    content: string,
  ): HandlerResult {
    if (content.trim() === "") {
      if (operation.type === "edit") {
        return {
          error: "Cannot edit empty file. Use WriteTool to add content.",
        };
      }

      // ReadTool 的特殊反馈
      return {
        warning:
          "<system-reminder>Warning: the file exists but the contents are empty.</system-reminder>",
      };
    }

    return { ok: true };
  }

  // 二进制文件检测
  static async detectBinaryFile(
    filePath: string,
    content: Buffer,
  ): Promise<boolean> {
    // 检查空字节(二进制文件中常见)
    for (let i = 0; i < Math.min(content.length, 8192); i++) {
      if (content[i] === 0) {
        return true;
      }
    }

    // 检查文件扩展名
    const binaryExtensions = [
      ".jpg",
      ".png",
      ".gif",
      ".pdf",
      ".zip",
      ".exe",
      ".dll",
      ".so",
      ".dylib",
    ];

    const ext = path.extname(filePath).toLowerCase();
    if (binaryExtensions.includes(ext)) {
      return true;
    }

    // 使用文件魔数
    const magicNumbers = {
      png: [0x89, 0x50, 0x4e, 0x47],
      jpg: [0xff, 0xd8, 0xff],
      pdf: [0x25, 0x50, 0x44, 0x46],
      zip: [0x50, 0x4b, 0x03, 0x04],
    };

    for (const [type, magic] of Object.entries(magicNumbers)) {
      if (this.bufferStartsWith(content, magic)) {
        return true;
      }
    }

    return false;
  }

  // 符号链接处理
  static async handleSymlink(
    filePath: string,
    operation: FileOperation,
  ): Promise<SymlinkResult> {
    try {
      const stats = await fs.lstat(filePath);

      if (!stats.isSymbolicLink()) {
        return { isSymlink: false };
      }

      const target = await fs.readlink(filePath);
      const resolvedTarget = path.resolve(path.dirname(filePath), target);

      // 检查目标是否存在
      const targetExists = await fs
        .access(resolvedTarget)
        .then(() => true)
        .catch(() => false);

      if (!targetExists && operation.type === "read") {
        return {
          isSymlink: true,
          error: `Broken symlink: points to non-existent ${target}`,
        };
      }

      // 对于编辑操作,提供选择
      if (operation.type === "edit") {
        return {
          isSymlink: true,
          warning: `This is a symlink to ${target}. Edit will modify the target file.`,
          target: resolvedTarget,
        };
      }

      return {
        isSymlink: true,
        target: resolvedTarget,
      };
    } catch (error) {
      return { isSymlink: false };
    }
  }

  // 编码检测和处理
  static async detectEncoding(
    filePath: string,
    buffer: Buffer,
  ): Promise<EncodingInfo> {
    // 检查 BOM
    if (buffer[0] === 0xef && buffer[1] === 0xbb && buffer[2] === 0xbf) {
      return { encoding: "utf8", hasBOM: true };
    }

    if (buffer[0] === 0xff && buffer[1] === 0xfe) {
      return { encoding: "utf16le", hasBOM: true };
    }

    if (buffer[0] === 0xfe && buffer[1] === 0xff) {
      return { encoding: "utf16be", hasBOM: true };
    }

    // 尝试 UTF-8
    try {
      const decoded = buffer.toString("utf8");
      // 检查替换字符
      if (!decoded.includes("\\ufffd")) {
        return { encoding: "utf8", hasBOM: false };
      }
    } catch {}

    // 回退启发式
    const nullBytes = buffer.filter((b) => b === 0).length;
    const highBytes = buffer.filter((b) => b > 127).length;

    if (nullBytes > buffer.length * 0.1) {
      return { encoding: "binary", hasBOM: false };
    }

    if (highBytes < buffer.length * 0.1) {
      return { encoding: "ascii", hasBOM: false };
    }

    // 默认为 utf8 并警告
    return {
      encoding: "utf8",
      hasBOM: false,
      warning: "Encoding uncertain, assuming UTF-8",
    };
  }
}

性能优化

大规模文件编辑需要仔细优化:

class FileEditingPerformance {
  // 大文件的缓存策略
  private static chunkCache = new Map<string, ChunkedFile>();

  static async readLargeFile(
    filePath: string,
    options: ReadOptions,
  ): Promise<FileContent> {
    const stats = await fs.stat(filePath);

    // 对超过 10MB 的文件使用流式处理
    if (stats.size > 10 * 1024 * 1024) {
      return this.streamRead(filePath, options);
    }

    // 对 1-10MB 的文件使用分块缓存
    if (stats.size > 1024 * 1024) {
      return this.chunkedRead(filePath, options);
    }

    // 小文件直接读取
    return this.directRead(filePath, options);
  }

  private static async chunkedRead(
    filePath: string,
    options: ReadOptions,
  ): Promise<FileContent> {
    const cached = this.chunkCache.get(filePath);

    if (cached && cached.mtime === (await fs.stat(filePath)).mtimeMs) {
      // 使用缓存的分块
      return this.assembleFromChunks(cached, options);
    }

    // 分块读取
    const chunkSize = 256 * 1024; // 256KB 分块
    const chunks: Buffer[] = [];
    const stream = createReadStream(filePath, {
      highWaterMark: chunkSize,
    });

    for await (const chunk of stream) {
      chunks.push(chunk);
    }

    // 缓存以供将来使用
    this.chunkCache.set(filePath, {
      chunks,
      mtime: (await fs.stat(filePath)).mtimeMs,
      encoding: "utf8",
    });

    return this.assembleFromChunks({ chunks }, options);
  }

  // 批量编辑准备
  static prepareBatchEdits(edits: Edit[], content: string): PreparedBatch {
    // 预先计算所有位置
    const positions = new Map<string, number[]>();

    for (const edit of edits) {
      if (!positions.has(edit.old_string)) {
        positions.set(
          edit.old_string,
          this.findAllPositions(content, edit.old_string),
        );
      }
    }

    // 按位置排序编辑(反向顺序以安全应用)
    const sortedEdits = edits
      .map((edit) => ({
        edit,
        position: positions.get(edit.old_string)![0],
      }))
      .sort((a, b) => b.position - a.position);

    return {
      edits: sortedEdits,
      positions,
      canApplyInReverse: true,
    };
  }

  // 内存高效的 diff 生成
  static *generateStreamingDiff(
    oldContent: string,
    newContent: string,
  ): Generator<DiffChunk> {
    const oldLines = oldContent.split("\\n");
    const newLines = newContent.split("\\n");

    // 对大文件使用滑动窗口
    const windowSize = 1000;
    let oldIndex = 0;
    let newIndex = 0;

    while (oldIndex < oldLines.length || newIndex < newLines.length) {
      const oldWindow = oldLines.slice(oldIndex, oldIndex + windowSize);
      const newWindow = newLines.slice(newIndex, newIndex + windowSize);

      const diff = this.computeWindowDiff(
        oldWindow,
        newWindow,
        oldIndex,
        newIndex,
      );

      yield diff;

      oldIndex += diff.oldConsumed;
      newIndex += diff.newConsumed;
    }
  }
}

性能特性

文件大小操作方法时间内存
<100KB读取直接<5msO(n)
100KB-1MB读取直接5-20msO(n)
1-10MB读取分块20-100msO(chunk)
>10MB读取流式100ms+O(1)
任意编辑(单个)内存<10msO(n)
任意编辑(多个)顺序<50msO(n)
任意写入直接<20msO(n)

常见失败模式 & 恢复

理解常见失败有助于构建健壮的编辑:

class FailureRecovery {
  // 外部修改冲突
  static async handleExternalModification(
    filePath: string,
    cachedState: FileState,
    operation: EditOperation,
  ): Promise<RecoveryStrategy> {
    const currentContent = await fs.readFile(filePath, "utf8");
    const currentStats = await fs.stat(filePath);

    // 尝试三方合并
    const mergeResult = await this.attemptThreeWayMerge(
      cachedState.content, // 基础
      operation.newContent, // 我们的
      currentContent, // 他们的
    );

    if (mergeResult.success && !mergeResult.conflicts) {
      return {
        strategy: "auto_merge",
        content: mergeResult.merged,
        warning: "File was modified externally. Changes have been merged.",
      };
    }

    // 生成冲突标记
    if (mergeResult.conflicts) {
      return {
        strategy: "conflict_markers",
        content: mergeResult.conflictMarked,
        error: "Merge conflicts detected. Manual resolution required.",
        conflicts: mergeResult.conflicts,
      };
    }

    // 回退:显示 diff 并询问
    return {
      strategy: "user_decision",
      error: "File modified externally",
      options: ["Overwrite external changes", "Abort edit", "Read file again"],
      diff: this.generateDiff(cachedState.content, currentContent),
    };
  }

  // 编码问题
  static async handleEncodingError(
    filePath: string,
    error: Error,
    content: string,
  ): Promise<RecoveryStrategy> {
    // 尝试不同的编码
    const encodings = ["utf8", "latin1", "utf16le"];

    for (const encoding of encodings) {
      try {
        const buffer = Buffer.from(content, encoding as any);
        await fs.writeFile(filePath + ".test", buffer);
        await fs.unlink(filePath + ".test");

        return {
          strategy: "alternate_encoding",
          encoding,
          warning: `Using ${encoding} encoding instead of UTF-8`,
        };
      } catch {}
    }

    // 二进制回退
    return {
      strategy: "binary_write",
      warning: "Treating as binary file",
      content: Buffer.from(content, "binary"),
    };
  }

  // 磁盘空间问题
  static async handleDiskSpaceError(
    filePath: string,
    requiredBytes: number,
  ): Promise<RecoveryStrategy> {
    const diskInfo = await this.getDiskInfo(path.dirname(filePath));

    if (diskInfo.available < requiredBytes) {
      // 计算可以释放的空间
      const suggestions = await this.analyzeDiskUsage(path.dirname(filePath));

      return {
        strategy: "free_space",
        error:
          `Insufficient disk space. Need ${this.formatBytes(requiredBytes)}, ` +
          `have ${this.formatBytes(diskInfo.available)}`,
        suggestions: suggestions.map((s) => ({
          path: s.path,
          size: this.formatBytes(s.size),
          type: s.type,
        })),
      };
    }

    // 可能是配额问题
    return {
      strategy: "quota_check",
      error: "Write failed despite apparent free space. Check disk quotas.",
      command: `quota -v ${process.env.USER}`,
    };
  }

  // 部分写入恢复
  static async recoverPartialWrite(
    filePath: string,
    expectedSize: number,
  ): Promise<RecoveryResult> {
    try {
      const stats = await fs.stat(filePath);

      if (stats.size === 0) {
        // 完全失败 - 检查备份
        const backupPath = filePath + ".backup";
        if (
          await fs
            .access(backupPath)
            .then(() => true)
            .catch(() => false)
        ) {
          await fs.rename(backupPath, filePath);
          return {
            recovered: true,
            method: "backup_restore",
          };
        }
      }

      if (stats.size < expectedSize) {
        // 部分写入 - 检查临时文件
        const tempPath = filePath + ".tmp";
        if (
          await fs
            .access(tempPath)
            .then(() => true)
            .catch(() => false)
        ) {
          const tempStats = await fs.stat(tempPath);
          if (tempStats.size === expectedSize) {
            await fs.rename(tempPath, filePath);
            return {
              recovered: true,
              method: "temp_file_restore",
            };
          }
        }
      }

      return {
        recovered: false,
        partialSize: stats.size,
        expectedSize,
      };
    } catch (error) {
      return {
        recovered: false,
        error: error.message,
      };
    }
  }
}