Skip to content

watercrawl/watercrawl-go

Repository files navigation

WaterCrawl Go SDK

This is the official Go SDK for the WaterCrawl API. It provides a simple and intuitive way to interact with the WaterCrawl web scraping service.

Installation

go get github.com/watercrawl/watercrawl-go

Usage

Initialize the client

import "github.com/watercrawl/watercrawl-go"

client := watercrawl.NewClient("your-api-key", "")  // Empty string uses default base URL

Create a crawl request

ctx := context.Background()
input := watercrawl.CreateCrawlRequestInput{
    URL: "https://example.com",
    Options: watercrawl.CrawlOptions{
        SpiderOptions: map[string]interface{}{
            "allowed_domains": []string{"example.com"},
        },
        PageOptions: map[string]interface{}{
            "wait_for": "#content",
        },
        PluginOptions: map[string]interface{}{
            "extract_links": true,
        },
    },
}

result, err := client.CreateCrawlRequest(ctx, input)
if err != nil {
    log.Fatal(err)
}

Monitor a crawl request

events, err := client.MonitorCrawlRequest(ctx, result.UUID, true)
if err != nil {
    log.Fatal(err)
}

for event := range events {
    switch event.Type {
    case "progress":
        fmt.Printf("Progress: %v\n", event.Data)
    case "result":
        fmt.Printf("Result: %v\n", event.Data)
    }
}

Quick URL scraping

pageOptions := map[string]interface{}{
    "wait_for": "#content",
}
pluginOptions := map[string]interface{}{
    "extract_links": true,
}

// Synchronous scraping with automatic download
result, err := client.ScrapeURL(ctx, "https://example.com", pageOptions, pluginOptions, true, true)
if err != nil {
    log.Fatal(err)
}
fmt.Printf("Scraped data: %v\n", result)

List crawl requests

list, err := client.GetCrawlRequests(ctx, 1, 10)  // Page 1, 10 items per page
if err != nil {
    log.Fatal(err)
}

for _, request := range list.Results {
    fmt.Printf("Request %s: %s\n", request.UUID, request.Status)
}

Get crawl request details

request, err := client.GetCrawlRequest(ctx, "request-uuid")
if err != nil {
    log.Fatal(err)
}

Stop a crawl request

err := client.StopCrawlRequest(ctx, "request-uuid")
if err != nil {
    log.Fatal(err)
}

Download crawl results

results, err := client.DownloadCrawlRequest(ctx, "request-uuid")
if err != nil {
    log.Fatal(err)
}

Get crawl request results

results, err := client.GetCrawlRequestResults(ctx, "request-uuid", 1, 10)
if err != nil {
    log.Fatal(err)
}

for _, result := range results.Results {
    fmt.Printf("Result for URL %s: %v\n", result.URL, result.Data)
}

Error Handling

The SDK uses standard Go error handling patterns. All methods that can fail return an error as their last return value. You should always check these errors before using the returned values.

Contributing

Please see CONTRIBUTING.md for details on how to contribute to this project.

License

This project is licensed under the MIT License - see the LICENSE file for details.

About

No description, website, or topics provided.

Resources

License

Contributing

Stars

Watchers

Forks

Packages

 
 
 

Contributors

Languages