From e8147659b719e5a1b5ad8dff3d0bfab15d08fe8e Mon Sep 17 00:00:00 2001 From: Wayne-Cole <77279425+Wacky404@users.noreply.github.com> Date: Sun, 26 Apr 2026 11:14:05 -0500 Subject: update+chore: updating files and switching remote --- cmd/lurchers/main.go | 64 ++++++++++++++++++------------------- howlers/src/hello.jac | 3 -- howlers/src/howler.jac | 73 ++++++++++++++++++++++++++++++++++++++++++ internal/lcommon.go | 11 ++++--- internal/procs/chldspawn.go | 33 +++++++++++++++++-- internal/url/addr.go | 77 +++++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 213 insertions(+), 48 deletions(-) delete mode 100644 howlers/src/hello.jac create mode 100644 howlers/src/howler.jac diff --git a/cmd/lurchers/main.go b/cmd/lurchers/main.go index 46a56ab..8516b90 100644 --- a/cmd/lurchers/main.go +++ b/cmd/lurchers/main.go @@ -1,59 +1,55 @@ package main import ( - "context" "log" "log/slog" - "time" + "runtime" + "context" - "github.com/Wacky404/lurchers/data/configs" - "github.com/Wacky404/lurchers/evade" - "github.com/Wacky404/lurchers/util" - "github.com/gocolly/colly" + "github.com/Wacky404/lurchers/internal" "github.com/joho/godotenv" ) +// make sure I'm putting the mem file in the +// right place +// windows? priority 0 +var ( + FILE_MEM string + FILE_LOGS string +) + +func init() { + switch runtime.GOOS { + case "darwin": + FILE_MEM = "/var/run/lurchers.mem" + FILE_LOGS = "/tmp/lurchers_logs/lurchers.log" + case "linux": + FILE_MEM = "/run/lurchers.mem" + FILE_LOGS = "/tmp/lurchers_logs/lurchers.log" + } +} + func main() { - logFile, err := util.SetupLogger(util.WithLogName("logs/lurchers.log")) + logFile, err := internal.SetupLogger(internal.WithLogName(FILE_LOGS)) if err != nil { - log.Fatal("error setting up logger", err) + log.Fatal("setuplogger: error setting up logger", err) } defer logFile.Close() - // this time out value will change - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500) - defer cancel() - err = godotenv.Load() if err != nil { - slog.Error("error loading .env file", slog.Any("error", err)) + slog.Error("load: error loading .env file", slog.Any("error", err)) } - // our buffed collector for indeed - i := configs.IndeedConfig() - evade.NewUserAgent(ctx, i.C) - proxies := []string{util.GetVar("TOR", "socks5://127.0.0.1:9050")} - err = evade.RotateProxy(i.C, &proxies) + mem, err := internal.Open(FILE_MEM) if err != nil { - slog.Error("error configuring the RotateProxy", slog.Any("error", err)) + log.Fatal("open: error opening mem file", slog.Any("error", err)) } + defer mem.Close() + + ctx := context.TODO() - // before making a request print "Visiting..." - i.C.OnRequest(func(r *colly.Request) { - r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") - r.Headers.Set("Accept-Language", "en-US,en;q=0.5") - r.Headers.Set("Connection", "keep-alive") - r.Headers.Set("Upgrade-Insecure-Requests", "1") - slog.Info("Going to website", slog.String("Request URL", r.URL.String())) - }) - // on error - i.C.OnError(func(r *colly.Response, err error) { - slog.Error("This is not working fam", slog.Any("Request URL", r.Request.URL), slog.Any("Response", r), slog.Any("error", err)) - }) - // start scraping on website(s) - i.C.Visit(i.Data.Posting.Url) - i.C.Wait() } diff --git a/howlers/src/hello.jac b/howlers/src/hello.jac deleted file mode 100644 index 2d18e52..0000000 --- a/howlers/src/hello.jac +++ /dev/null @@ -1,3 +0,0 @@ -with entry { - print("Hello, World!"); -} diff --git a/howlers/src/howler.jac b/howlers/src/howler.jac new file mode 100644 index 0000000..5789e20 --- /dev/null +++ b/howlers/src/howler.jac @@ -0,0 +1,73 @@ +import sys; +import mmap; +import struct; +import from typing { List, Dict } +import from os { path } + +glob MAX_EVENTS: int = 256; +glob DIR_SCRIPTS: str = ""; + +enum EVENT { + CHLD_PROC_START, + CHLD_PROC_DONE, + CHLD_PROC_FAILED, + CHLD_PROC_HURT, + CHLD_PROC_HEALING, + CHLD_PROC_HEALED +} + +glob EventName: Dict[EVENT, str] = { + CHLD_PROC_START: "child_start", + CHLD_PROC_DONE: "child_done", + CHLD_PROC_FAILED: "child_failed", + CHLD_PROC_HURT: "child_hurt", + CHLD_PROC_HEALING: "child_healing", + CHLD_PROC_HEALED: "child_healed" +}; + +obj SysLurchEvent_t { + has eventTime: int; + has eventKind: EVENT; + has eventID: int; + has data1: int; + has data2: int; +} + +obj WatchMen { + static has eventHead: int; + static has eventTail: int; + static has EventQue: List[bytes(SysLurchEvent_t)]; + + static def isBufferFull() -> bool { + report ((eventHead + 1) % MAX_EVENTS) == eventTail; + } +} + +node Website { + has url: str; + has timemout: int; + has retry: int; + has script: str; + + has:priv _globals: Dict; + has:priv _locals: Dict; + + can run with Crawler entry { + exec(script, _globals, _locals); + # do stuff with the data + } +} + +walker Crawler { + has data: Dict[str, ...]; + + can crawl with Website entry { + self.data[visit.url] = visit.data; + } +} + +def fix_scrape_script(script: str) -> str by llm(); + +with entry { + print("Hello, World!"); +} diff --git a/internal/lcommon.go b/internal/lcommon.go index d9c522f..d6f53d0 100644 --- a/internal/lcommon.go +++ b/internal/lcommon.go @@ -122,7 +122,7 @@ func WithFileLevel(level slog.Level) func(*options) { /* * ======================================================= - * Lurchers Event logger; logs every event from child proc + * Lurchers Event Logger; logs every event from child proc * using the SPSC model * ======================================================= */ @@ -160,15 +160,16 @@ func (et EventType) String() string { type SysLurchEvent_t struct { EventTime int - EventKind EventType EventID int Data1, Data2 int + EventKind EventType } type WatchMen struct { - eventHead int - eventTail int - EventQue [MAX_EVENTS]SysLurchEvent_t + eventHead int + eventTail int + EventsTotal int + EventQue [MAX_EVENTS]SysLurchEvent_t } // single consumer diff --git a/internal/procs/chldspawn.go b/internal/procs/chldspawn.go index eb7db69..391581d 100644 --- a/internal/procs/chldspawn.go +++ b/internal/procs/chldspawn.go @@ -2,13 +2,40 @@ package procs import ( "context" - "os" + "fmt" + "os/exec" "github.com/Wacky404/lurchers/internal/url" ) +type Pid int + type Proc struct { - ID *os.Process - ctx context.Context + PID Pid + cmd *exec.Cmd opts *url.UserParams } + +func (p *Proc) NewChildProc(ctx context.Context, offset int) (Pid, error) { + // --offset is byte offset of ring buffer in mem file from mmap + // --options is packed json of user params for jac file to restrict job(s) to + // need to read docs on exec lib and spawning child procs in go + // howler --offset val1 --options val2 + (*p).cmd = exec.Command("howler", fmt.Sprintf("--offset %d --options %v", offset, (*p).opts)) + err := (*p).cmd.Start() + if err != nil { + return nil, err + } + (*p).PID = Pid((*p).cmd.Process.Pid) + return (*p).PID, nil +} + +func NewProc(prog string, flags []string) *Proc { + if !prog { + prog = "howler" + } + return &Proc{ + PID: nil, + cmd: exec.Command() + } +} diff --git a/internal/url/addr.go b/internal/url/addr.go index 555a596..e69d200 100644 --- a/internal/url/addr.go +++ b/internal/url/addr.go @@ -1,16 +1,63 @@ package url import ( + "encoding/json" "errors" + "fmt" "strings" + "time" ) type Websites_t []string type UserParams struct { - Websites Websites_t - Timeout int - Retry bool + Websites Websites_t `json:"websites"` + Timeout time.Duration `json:"timeout"` + Retry int8 `json:"retry"` +} + +type options struct { + websites Websites_t + timeout time.Duration + retry int8 +} + +func NewUserParams(opts ...func(*options)) *UserParams { + // will fail on default, but allows for just setting + // websites field without worrying about other opts + o := options{ + websites: nil, + timeout: 1 * time.Hour, + retry: 5, + } + + for _, opt := range opts { + opt(&o) + } + + return &UserParams{ + Websites: o.websites, + Timeout: o.timeout, + Retry: o.retry, + } +} + +func WithWebsites(w Websites_t) func(*options) { + return func(o *options) { + o.websites = w + } +} + +func WithTimeout(t time.Duration) func(*options) { + return func(o *options) { + o.timeout = t + } +} + +func WithRetry(r int8) func(*options) { + return func(o *options) { + o.retry = r + } } func (u *UserParams) NewWebsite(w string) (*Websites_t, error) { @@ -30,3 +77,27 @@ func (u *UserParams) GetWebsites() (*Websites_t, error) { return &(*u).Websites, nil } + +func (u *UserParams) Package() ([]byte, error) { + // turn struct values into json byte array for child proc + if u == nil { + return nil, errors.New("package: userparams is not initialized") + } else if (*u).Websites == nil { + return nil, errors.New("package: websites empty") + } + + packed, err := json.Marshal(u) + if err != nil { + return nil, err + } + + return packed, nil +} + +// TODO: deconstruct opts into a string for relay to chld proc cmd +func (u *UserParams) DeconstructOpts() (string, error) { + if (*u).Retry == 0 || (*u).Timeout == 0 || (*u).Websites == nil { + return "", errors.New("deconstructopts: one or more options are nil") + } + return fmt.Sprintf("websites=[],timeout=%v,retry=%d", (*u).Timeout, (*u).Retry), nil +} -- cgit v1.3-3-g829e