Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion apps/cli/ai/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ export function startAiAgent( config: AiAgentConfig ): Query {
: createStudioTools( { enablePreviewSteering: isForkedByDesktop } ),
};

// The remote-session controller sets STUDIO_REMOTE_SESSION=1 when it spawns
// `studio code --json` so the agent knows it's driving Telegram and should
// favor screenshot replies.
const remoteSession = resolvedEnv.STUDIO_REMOTE_SESSION === '1';

// Build site-aware system prompt
const systemPromptOptions = isRemoteSite
? {
Expand All @@ -89,8 +94,9 @@ export function startAiAgent( config: AiAgentConfig ): Query {
url: activeSite.url ?? '',
id: activeSite.wpcomSiteId!,
},
remoteSession,
}
: { previewSteering: isForkedByDesktop };
: { previewSteering: isForkedByDesktop, remoteSession };

if ( ! fs.existsSync( STUDIO_SITES_ROOT ) ) {
fs.mkdirSync( STUDIO_SITES_ROOT, { recursive: true } );
Expand Down
24 changes: 22 additions & 2 deletions apps/cli/ai/system-prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,29 @@ export interface BuildSystemPromptOptions {
// tools to the agent. When false, the "Keep the preview in sync" section
// is omitted so we don't document tools the agent can't actually call.
previewSteering?: boolean;
// True when the agent is being driven by the Telegram remote-session bridge.
// Adds guidance about delivering screenshots via `share_screenshot` and
// offering a preview-site follow-up.
remoteSession?: boolean;
}

export function buildSystemPrompt( options?: BuildSystemPromptOptions ): string {
const remoteSessionAddendum = options?.remoteSession ? `\n\n${ REMOTE_SESSION_GUIDANCE }` : '';

if ( options?.remoteSite ) {
return `${ buildRemoteIntro( options.remoteSite ) }

${ REMOTE_CONTENT_GUIDELINES }

${ REMOTE_DESIGN_GUIDELINES }
${ REMOTE_DESIGN_GUIDELINES }${ remoteSessionAddendum }
`;
}

return `${ buildLocalIntro( { previewSteering: options?.previewSteering ?? false } ) }

${ LOCAL_CONTENT_GUIDELINES }

${ LOCAL_DESIGN_GUIDELINES }
${ LOCAL_DESIGN_GUIDELINES }${ remoteSessionAddendum }
`;
}

Expand Down Expand Up @@ -194,6 +200,20 @@ One \`Write\` or \`Edit\` per turn (read-only \`site_info\`, \`site_list\`, \`wp
- All animations and transitions must respect \`prefers-reduced-motion\`. Add a \`@media (prefers-reduced-motion: reduce)\` block that disables or simplifies animations (e.g. \`animation: none; transition: none; scroll-behavior: auto;\`).`;
}

const REMOTE_SESSION_GUIDANCE = `## Telegram remote session

You are running over Telegram. The user iterates turn-by-turn; keep replies short and image-driven.

After ANY visible change to a site, call \`share_screenshot\` before ending the turn — no preamble, no permission-asking. It is fire-and-forget: the image goes to the user but is NOT returned to you. Do not analyze or describe what you sent. Follow up with at most one short sentence (e.g. "Heading is now red." or "Want me to publish this as a preview?").

Defaults to a 16:9 above-the-fold view. Pass \`fullPage: true\` only when the user explicitly asks for the whole page. Captions describe what the user is looking at; never mention "full page", "viewport", or other capture-mode wording.

\`take_screenshot\` is separate — use it only when YOU need to inspect a render before continuing. Don't pair it with \`share_screenshot\` for the same URL.

For non-visual changes (data, logs, listings), reply with a concise text summary; no screenshot needed.

Never claim to have stored, saved, or remembered anything beyond what your tools actually did. There is no gist storage, no preview-link memory, no session summary. Do not invent epilogues like "gist stored" or "preview link saved".`;

const REMOTE_CONTENT_GUIDELINES = `## Block content guidelines

- Use only core WordPress blocks. No custom HTML blocks except for inline SVGs.
Expand Down
223 changes: 154 additions & 69 deletions apps/cli/ai/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -665,15 +665,98 @@ const validateBlocksTool = tool(

// --- Screenshot tool ---

// Tall portrait viewport used by `take_screenshot` for full-page captures
// where the agent wants to inspect the whole scrolled page at once.
const VIEWPORTS = {
desktop: { width: 1040, height: 1248 },
mobile: { width: 390, height: 844 },
} as const;

// 16:9 viewport used by `share_screenshot` to capture "as it would look on a
// screen" — an above-the-fold view of the rendered page. The user can ask for
// the full page explicitly by setting `fullPage: true`.
const SHARE_VIEWPORTS = {
desktop: { width: 1280, height: 720 },
mobile: { width: 390, height: 844 },
} as const;

async function captureScreenshotPng(
url: string,
viewport: { width: number; height: number },
options: { fullPage: boolean }
): Promise< string > {
const browser = await getSharedBrowser();
const page = await browser.newPage( { viewport } );

try {
await page.emulateMedia( { reducedMotion: 'reduce' } );
await page.goto( url, { waitUntil: 'domcontentloaded', timeout: 30000 } );
await page.waitForLoadState( 'networkidle', { timeout: 10000 } ).catch( () => {} );

// For full-page captures, scroll through the entire document so lazy-loaded
// images can begin loading. For viewport captures we keep the page where
// it is and only wait on images that intersect the first viewport, so
// above-the-fold shots stay quick on long pages.
await page.evaluate( async ( fullPage ) => {
const delay = ( ms: number ) =>
new Promise< void >( ( resolve ) => setTimeout( resolve, ms ) );

if ( fullPage ) {
const scrollHeight = document.body.scrollHeight;
const viewportHeight = window.innerHeight;
for ( let y = 0; y < scrollHeight; y += viewportHeight ) {
window.scrollTo( 0, y );
await delay( 100 );
}
window.scrollTo( 0, 0 );
}

const pendingImages = Array.from( document.images ).filter( ( img ) => {
if ( img.complete ) {
return false;
}
if ( fullPage ) {
return true;
}
const rect = img.getBoundingClientRect();
return rect.bottom > 0 && rect.top < window.innerHeight;
} );
const timeout = new Promise< void >( ( resolve ) => setTimeout( resolve, 5000 ) );
const allImages = Promise.all(
pendingImages.map(
( img ) =>
new Promise< void >( ( resolve ) => {
img.addEventListener( 'load', () => resolve(), { once: true } );
img.addEventListener( 'error', () => resolve(), { once: true } );
} )
)
);
await Promise.race( [ allImages, timeout ] );
}, options.fullPage );

// Hide WordPress admin bar and scrollbars for cleaner screenshots
await page.addStyleTag( {
content: `
#wpadminbar { display: none !important; }
html { margin-top: 0 !important; }
::-webkit-scrollbar { display: none !important; }
html, body { scrollbar-width: none !important; }
`,
} );

const buffer = await page.screenshot( { fullPage: options.fullPage, type: 'png' } );
return buffer.toString( 'base64' );
} finally {
await page.close();
}
}

const takeScreenshotTool = tool(
'take_screenshot',
'Takes a full-page screenshot of a URL. Returns the screenshot as an image that you can analyze visually. ' +
'Supports desktop and mobile viewports. Use this to verify the site looks correct after building it.',
'Supports desktop and mobile viewports. Use this to verify the site looks correct after building it. ' +
'Note: this image is for your own visual reasoning only — the user does not see it. ' +
'Use `share_screenshot` instead when you want to deliver the rendered page to the user.',
{
url: z.string().describe( 'The URL to screenshot' ),
viewport: z
Expand All @@ -686,79 +769,80 @@ const takeScreenshotTool = tool(
async ( args ) => {
try {
const viewportType = args.viewport ?? 'desktop';
const viewport = VIEWPORTS[ viewportType ];

emitProgress( `Taking ${ viewportType } screenshot of ${ args.url }…` );
const base64 = await captureScreenshotPng( args.url, VIEWPORTS[ viewportType ], {
fullPage: true,
} );
emitProgress( `Screenshot captured (${ viewportType })` );
return {
content: [
{
type: 'image' as const,
data: base64,
mimeType: 'image/png',
},
],
};
} catch ( error ) {
return errorResult(
`Screenshot failed: ${ error instanceof Error ? error.message : String( error ) }`
);
}
}
);

const browser = await getSharedBrowser();
const page = await browser.newPage( { viewport } );

try {
// Reduce motion to avoid capturing mid-animation states
await page.emulateMedia( { reducedMotion: 'reduce' } );

await page.goto( args.url, { waitUntil: 'domcontentloaded', timeout: 30000 } );
await page.waitForLoadState( 'networkidle', { timeout: 10000 } ).catch( () => {} );

// Scroll through the page to trigger lazy-loaded images, then wait
// for all images to finish loading (with a timeout so we don't hang
// on images that never settle).
await page.evaluate( async () => {
const delay = ( ms: number ) =>
new Promise< void >( ( resolve ) => setTimeout( resolve, ms ) );
const scrollHeight = document.body.scrollHeight;
const viewportHeight = window.innerHeight;
for ( let y = 0; y < scrollHeight; y += viewportHeight ) {
window.scrollTo( 0, y );
await delay( 100 );
}
window.scrollTo( 0, 0 );

const timeout = new Promise< void >( ( resolve ) => setTimeout( resolve, 5000 ) );
const allImages = Promise.all(
Array.from( document.images )
.filter( ( img ) => ! img.complete )
.map(
( img ) =>
new Promise< void >( ( resolve ) => {
img.addEventListener( 'load', () => resolve() );
img.addEventListener( 'error', () => resolve() );
} )
)
);
await Promise.race( [ allImages, timeout ] );
} );

// Hide WordPress admin bar and scrollbars for cleaner screenshots
await page.addStyleTag( {
content: `
#wpadminbar { display: none !important; }
html { margin-top: 0 !important; }
::-webkit-scrollbar { display: none !important; }
html, body { scrollbar-width: none !important; }
`,
} );

const buffer = await page.screenshot( { fullPage: true, type: 'png' } );
const base64 = buffer.toString( 'base64' );
const shareScreenshotTool = tool(
'share_screenshot',
'Fire-and-forget primitive that captures a URL and delivers the image to the user. ' +
'Call after ANY visible change to a site so the user sees the new state. ' +
'Returns a confirmation string only — the image is NOT returned to you. The user already has the picture; do not analyze or describe what was sent in your reply. After calling this, write at most one short follow-up sentence and end the turn. ' +
'Defaults to a 16:9 above-the-fold view. Set `fullPage: true` only when the user explicitly asks for the whole scroll length. ' +
'Distinct from `take_screenshot`, which is for your own visual reasoning before continuing work.',
{
url: z.string().describe( 'The URL to screenshot and send to the user' ),
viewport: z
.enum( [ 'desktop', 'mobile' ] )
.optional()
.describe(
'Viewport size: "desktop" (1280x720, 16:9) or "mobile" (390x844). Defaults to desktop.'
),
fullPage: z
.boolean()
.optional()
.describe(
'When true, capture the entire scrolled page instead of just the viewport. Defaults to false; only set this when the user has explicitly asked for the full page.'
),
caption: z
.string()
.optional()
.describe(
'Short caption sent with the image. Describe what the user is looking at; do NOT mention "full page", "viewport", or other capture-mode wording. Keep it under ~1024 characters.'
),
},
async ( args ) => {
try {
const viewportType = args.viewport ?? 'desktop';
const base64 = await captureScreenshotPng( args.url, SHARE_VIEWPORTS[ viewportType ], {
fullPage: args.fullPage ?? false,
} );

emitProgress( `Screenshot captured (${ viewportType })` );
emitEvent( {
type: 'media.share',
timestamp: new Date().toISOString(),
mediaType: 'image',
mimeType: 'image/png',
dataBase64: base64,
caption: args.caption,
} );

return {
content: [
{
type: 'image' as const,
data: base64,
mimeType: 'image/png',
},
],
};
} finally {
await page.close();
}
return textResult(
`Screenshot delivered to the user (${ viewportType }${
args.fullPage ? ', full page' : ''
}). The user is viewing it now; do not describe what was sent.`
);
} catch ( error ) {
return errorResult(
`Screenshot failed: ${ error instanceof Error ? error.message : String( error ) }`
`Share screenshot failed: ${ error instanceof Error ? error.message : String( error ) }`
);
}
}
Expand Down Expand Up @@ -1169,6 +1253,7 @@ export const studioToolDefinitions = [
runWpCliTool,
validateBlocksTool,
takeScreenshotTool,
shareScreenshotTool,
installTaxonomyScriptsTool,
auditPerformanceTool,
auditSeoTool,
Expand Down Expand Up @@ -1217,6 +1302,6 @@ export function createRemoteSiteTools( token: string, siteId: number ) {
return createSdkMcpServer( {
name: 'studio',
version: '1.0.0',
tools: [ ...wpcomTools, takeScreenshotTool, createSiteTool, pullSiteTool ],
tools: [ ...wpcomTools, takeScreenshotTool, shareScreenshotTool, createSiteTool, pullSiteTool ],
} );
}
Loading
Loading