Automating GitHub Projects V2 with Python: A Practical Guide

I. Prerequisites

GitHub Projects V2 is only accessible via the GraphQL API. The REST API does not support project field operations. You need a Personal Access Token (classic) with scopes: project, repo, read:org. Store it as a repository secret for GitHub Actions.

II. GraphQL Client

Every interaction with Projects V2 goes through a single GraphQL endpoint. Here is a reusable client with error handling and rate-limit awareness:

python

import os, time, requests
from typing import Any

# Read token from environment
GH_TOKEN = os.environ['MY_GH_TOKEN']
GH_GQL = 'https://api.github.com/graphql'
HEADERS = {'Authorization': f'token {GH_TOKEN}', 'Content-Type': 'application/json'}

def gql(query: str, variables: dict | None = None) -> dict[str, Any]:
    """Execute a GraphQL query against the GitHub API.
    Raises on HTTP errors and GraphQL-level errors."""
    payload: dict[str, Any] = {'query': query}
    if variables:
        payload['variables'] = variables
    # Send the request with a 30s timeout to avoid hanging
    r = requests.post(GH_GQL, headers=HEADERS, json=payload, timeout=30)
    r.raise_for_status()  # Raise on 4xx/5xx HTTP errors
    body = r.json()
    # GraphQL can return 200 OK with errors in the body
    if 'errors' in body:
        raise RuntimeError(f"GraphQL errors: {body['errors']}")
    return body['data']

III. Discovering Project Structure

Before updating anything, you need the internal IDs for your project's fields and their options. This function returns a dictionary you can reuse throughout your script:

python

def get_fields(project_id: str) -> dict[str, dict]:
    """Return {field_name: {'id': str, 'options': {name: id}}}."""
    q = """
    query($pid: ID!) {
      node(id: $pid) { ... on ProjectV2 {
        fields(first: 50) { nodes {
          ... on ProjectV2SingleSelectField {
            id name options { id name }
          }
          ... on ProjectV2Field { id name }
        }}
      }}
    }"""
    # Execute the query and extract the fields array
    nodes = gql(q, {'pid': project_id})['node']['fields']['nodes']
    out = {}
    for f in nodes:
        entry = {'id': f['id']}
        # Single-select fields have options; text/number fields don't
        if 'options' in f:
            entry['options'] = {o['name']: o['id'] for o in f['options']}
        out[f['name']] = entry
    return out

# Usage: get all fields and their option IDs
fields = get_fields('PVT_kwDO...')
# Access a specific field's options by name
print(fields['Status']['options'])
# {'Todo': 'abc123', 'In Progress': 'def456', 'Done': 'ghi789'}

IV. Listing Project Items (with Pagination)

Projects with 100+ items require cursor-based pagination. This generator yields items one page at a time:

python

from typing import Generator

def iter_items(project_id: str, page_size: int = 100
) -> Generator[list[dict], None, None]:
    """Yield pages of project items with their field values."""
    cursor = None  # Pagination cursor, None for first page
    q = """
    query($pid: ID!, $first: Int!, $after: String) {
      node(id: $pid) { ... on ProjectV2 {
        items(first: $first, after: $after) {
          pageInfo { hasNextPage endCursor }
          nodes {
            id
            content { ... on Issue { id number title } }
            fieldValues(first: 20) { nodes {
              ... on ProjectV2ItemFieldSingleSelectValue {
                field { ... on ProjectV2SingleSelectField { name } }
                optionId
              }
              ... on ProjectV2ItemFieldTextValue {
                field { ... on ProjectV2Field { name } }
                text
              }
            }}
          }
        }
      }}
    }"""
    while True:
        v = {'pid': project_id, 'first': page_size}
        if cursor:
            v['after'] = cursor  # Continue from last position
        data = gql(q, v)['node']['items']
        yield data['nodes']  # Yield one page of items
        # Stop if no more pages
        if not data['pageInfo']['hasNextPage']:
            break
        cursor = data['pageInfo']['endCursor']  # Move to next page
        time.sleep(1)  # Respect GitHub rate limits

# Flatten all pages into a single list
all_items = [item for page in iter_items(PROJECT_ID) for item in page]

V. Updating Fields

Two mutations cover most use cases: single-select fields (Status, Priority, Stage) and text fields (Assignee names, notes). Both require the project ID, item ID, and field ID.

python

def set_select(project_id: str, item_id: str,
               field_id: str, option_id: str) -> None:
    """Set a single-select field value."""
    # Uses the updateProjectV2ItemFieldValue mutation
    # with singleSelectOptionId for dropdown fields
    gql("""
    mutation($p: ID!, $i: ID!, $f: ID!, $o: String!) {
      updateProjectV2ItemFieldValue(input: {
        projectId: $p, itemId: $i, fieldId: $f,
        value: {singleSelectOptionId: $o}
      }) { projectV2Item { id } }
    }""", {'p': project_id, 'i': item_id, 'f': field_id, 'o': option_id})

def set_text(project_id: str, item_id: str,
            field_id: str, value: str) -> None:
    """Set a text field value."""
    # Same mutation, but uses text instead of singleSelectOptionId
    gql("""
    mutation($p: ID!, $i: ID!, $f: ID!, $v: String!) {
      updateProjectV2ItemFieldValue(input: {
        projectId: $p, itemId: $i, fieldId: $f,
        value: {text: $v}
      }) { projectV2Item { id } }
    }""", {'p': project_id, 'i': item_id, 'f': field_id, 'v': value})

# Example: move an issue to 'Done' status
status = fields['Status']  # From get_fields() above
set_select(PROJECT_ID, item_id, status['id'], status['options']['Done'])

VI. Adding Issues to a Project

To add an existing issue to a project, you need its GraphQL node ID (not the issue number). Two functions handle this:

python

def issue_node_id(repo: str, number: int) -> str:
    """Get the GraphQL node ID for an issue number.
    GitHub REST uses numbers; GraphQL uses opaque node IDs."""
    owner, name = repo.split('/')
    data = gql("""
    query($o: String!, $r: String!, $n: Int!) {
      repository(owner: $o, name: $r) {
        issue(number: $n) { id }
      }
    }""", {'o': owner, 'r': name, 'n': number})
    return data['repository']['issue']['id']

def add_to_project(project_id: str, content_id: str) -> str:
    """Add an issue to a project. Returns the new item ID.
    The item ID is needed for subsequent field updates."""
    data = gql("""
    mutation($p: ID!, $c: ID!) {
      addProjectV2ItemById(input: {
        projectId: $p, contentId: $c
      }) { item { id } }
    }""", {'p': project_id, 'c': content_id})
    return data['addProjectV2ItemById']['item']['id']

# Usage: add issue #42 to the project
node_id = issue_node_id('myorg/myrepo', 42)  # REST number -> GraphQL ID
new_item_id = add_to_project(PROJECT_ID, node_id)  # Returns item ID

VII. Posting Comments (with Deduplication)

Issue comments use the REST API (simpler than GraphQL for this). Always check for duplicates before posting:

python

REST = 'https://api.github.com'

def post_comment(repo: str, issue_num: int, body: str) -> bool:
    """Post a comment if it doesn't already exist. Returns True if posted."""
    url = f'{REST}/repos/{repo}/issues/{issue_num}/comments'
    # Fetch existing comments to check for duplicates
    existing = requests.get(url, headers=HEADERS).json()
    if any(c['body'] == body for c in existing):
        return False  # Exact match found, skip posting
    # Post the new comment
    r = requests.post(url, headers=HEADERS, json={'body': body})
    r.raise_for_status()
    return True

# Define reusable notification templates
# Use {placeholders} for dynamic values
TEMPLATES = {
    'approved': 'Submission {id} passed review for stage {stage}. '
                'Next stage is ready.\n\n@{user}',
    'resubmit': 'Submission {id} needs revisions for stage {stage}. '
                'Please check feedback and resubmit.\n\n@{user}',
}

def notify(repo, issue_num, template, **kwargs):
    """Send a templated notification as an issue comment."""
    body = TEMPLATES[template].format(**kwargs)  # Fill in placeholders
    posted = post_comment(repo, issue_num, body)
    if posted:
        print(f'Notified on #{issue_num}: {template}')

VIII. GitHub Actions Workflow

Schedule the script with a cron trigger. Use workflow_dispatch for manual runs during development. Persist the event log as an artifact so it survives between runs:

yaml

name: Sync Project
on:
  schedule:
    - cron: '*/15 * * * *'  # Run every 15 minutes
  workflow_dispatch:          # Allow manual trigger from Actions tab

jobs:
  sync:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      # Download the event log from previous run (if it exists)
      - uses: actions/download-artifact@v4
        with:
          name: event-log
          path: .
        continue-on-error: true  # First run won't have an artifact

      - uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - run: pip install requests

      # Run the sync script with secrets injected as env vars
      - run: python sync.py
        env:
          MY_GH_TOKEN: ${{ secrets.PROJECT_TOKEN }}
          MY_GH_PROJECT_ID: ${{ vars.PROJECT_ID }}

      # Save the event log for the next run
      - uses: actions/upload-artifact@v4
        with:
          name: event-log
          path: event_log.json
          overwrite: true  # Replace previous artifact

IX. Idempotent Event Processing

The script runs every 15 minutes. Without deduplication, it would re-process every event and post duplicate comments. A JSON event log solves this:

python

import json
from pathlib import Path
from datetime import datetime

# Persistent log file (saved as GitHub Actions artifact)
LOG = Path('event_log.json')

def load_log() -> dict:
    """Load the event log from disk, or return empty dict."""
    return json.loads(LOG.read_text()) if LOG.exists() else {}

def save_log(log: dict) -> None:
    """Write the event log to disk."""
    LOG.write_text(json.dumps(log, indent=2))

def event_key(event_type: str, entity_id: str,
             timestamp: str) -> str:
    """Create a unique key for deduplication.
    Combines type + ID + timestamp so the same event
    from the same source at the same time is only processed once."""
    return f'{event_type}:{entity_id}:{timestamp}'

def is_processed(key: str) -> bool:
    """Check if this event was already handled."""
    return key in load_log()

def mark_done(key: str) -> None:
    """Record that this event has been processed."""
    log = load_log()
    log[key] = datetime.now().isoformat()
    save_log(log)

# Usage in your main processing loop
for record in new_records:
    key = event_key('submission', record['id'], record['timestamp'])
    if is_processed(key):
        continue  # Skip already-handled events
    # ... process the record ...
    mark_done(key)  # Prevent reprocessing on next run

X. Summary

The complete pattern is: GitHub Action triggers on cron, script reads external data, matches to issues, checks the event log, updates project fields via GraphQL, posts deduplicated comments via REST, and persists the log as an artifact. Every function above is self-contained and reusable. Adapt the field names, templates, and data sources to your workflow.