summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWayne-Cole <77279425+Wacky404@users.noreply.github.com>2026-04-26 11:14:05 -0500
committerWayne-Cole <77279425+Wacky404@users.noreply.github.com>2026-04-26 11:14:05 -0500
commite8147659b719e5a1b5ad8dff3d0bfab15d08fe8e (patch)
treec59730f3ee989075dd2a668af2af704d13fe01bb
parent46794376469f5460b1f3a6beac747e2f0ba140ff (diff)
downloadlurchers-e8147659b719e5a1b5ad8dff3d0bfab15d08fe8e.tar.xz
lurchers-e8147659b719e5a1b5ad8dff3d0bfab15d08fe8e.zip
update+chore: updating files and switching remote
-rw-r--r--cmd/lurchers/main.go64
-rw-r--r--howlers/src/hello.jac3
-rw-r--r--howlers/src/howler.jac73
-rw-r--r--internal/lcommon.go11
-rw-r--r--internal/procs/chldspawn.go33
-rw-r--r--internal/url/addr.go77
6 files changed, 213 insertions, 48 deletions
diff --git a/cmd/lurchers/main.go b/cmd/lurchers/main.go
index 46a56ab..8516b90 100644
--- a/cmd/lurchers/main.go
+++ b/cmd/lurchers/main.go
@@ -1,59 +1,55 @@
package main
import (
- "context"
"log"
"log/slog"
- "time"
+ "runtime"
+ "context"
- "github.com/Wacky404/lurchers/data/configs"
- "github.com/Wacky404/lurchers/evade"
- "github.com/Wacky404/lurchers/util"
- "github.com/gocolly/colly"
+ "github.com/Wacky404/lurchers/internal"
"github.com/joho/godotenv"
)
+// make sure I'm putting the mem file in the
+// right place
+// windows? priority 0
+var (
+ FILE_MEM string
+ FILE_LOGS string
+)
+
+func init() {
+ switch runtime.GOOS {
+ case "darwin":
+ FILE_MEM = "/var/run/lurchers.mem"
+ FILE_LOGS = "/tmp/lurchers_logs/lurchers.log"
+ case "linux":
+ FILE_MEM = "/run/lurchers.mem"
+ FILE_LOGS = "/tmp/lurchers_logs/lurchers.log"
+ }
+}
+
func main() {
- logFile, err := util.SetupLogger(util.WithLogName("logs/lurchers.log"))
+ logFile, err := internal.SetupLogger(internal.WithLogName(FILE_LOGS))
if err != nil {
- log.Fatal("error setting up logger", err)
+ log.Fatal("setuplogger: error setting up logger", err)
}
defer logFile.Close()
- // this time out value will change
- ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500)
- defer cancel()
-
err = godotenv.Load()
if err != nil {
- slog.Error("error loading .env file", slog.Any("error", err))
+ slog.Error("load: error loading .env file", slog.Any("error", err))
}
- // our buffed collector for indeed
- i := configs.IndeedConfig()
- evade.NewUserAgent(ctx, i.C)
- proxies := []string{util.GetVar("TOR", "socks5://127.0.0.1:9050")}
- err = evade.RotateProxy(i.C, &proxies)
+ mem, err := internal.Open(FILE_MEM)
if err != nil {
- slog.Error("error configuring the RotateProxy", slog.Any("error", err))
+ log.Fatal("open: error opening mem file", slog.Any("error", err))
}
+ defer mem.Close()
+
+ ctx := context.TODO()
- // before making a request print "Visiting..."
- i.C.OnRequest(func(r *colly.Request) {
- r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
- r.Headers.Set("Accept-Language", "en-US,en;q=0.5")
- r.Headers.Set("Connection", "keep-alive")
- r.Headers.Set("Upgrade-Insecure-Requests", "1")
- slog.Info("Going to website", slog.String("Request URL", r.URL.String()))
- })
- // on error
- i.C.OnError(func(r *colly.Response, err error) {
- slog.Error("This is not working fam", slog.Any("Request URL", r.Request.URL), slog.Any("Response", r), slog.Any("error", err))
- })
- // start scraping on website(s)
- i.C.Visit(i.Data.Posting.Url)
- i.C.Wait()
}
diff --git a/howlers/src/hello.jac b/howlers/src/hello.jac
deleted file mode 100644
index 2d18e52..0000000
--- a/howlers/src/hello.jac
+++ /dev/null
@@ -1,3 +0,0 @@
-with entry {
- print("Hello, World!");
-}
diff --git a/howlers/src/howler.jac b/howlers/src/howler.jac
new file mode 100644
index 0000000..5789e20
--- /dev/null
+++ b/howlers/src/howler.jac
@@ -0,0 +1,73 @@
+import sys;
+import mmap;
+import struct;
+import from typing { List, Dict }
+import from os { path }
+
+glob MAX_EVENTS: int = 256;
+glob DIR_SCRIPTS: str = "";
+
+enum EVENT {
+ CHLD_PROC_START,
+ CHLD_PROC_DONE,
+ CHLD_PROC_FAILED,
+ CHLD_PROC_HURT,
+ CHLD_PROC_HEALING,
+ CHLD_PROC_HEALED
+}
+
+glob EventName: Dict[EVENT, str] = {
+ CHLD_PROC_START: "child_start",
+ CHLD_PROC_DONE: "child_done",
+ CHLD_PROC_FAILED: "child_failed",
+ CHLD_PROC_HURT: "child_hurt",
+ CHLD_PROC_HEALING: "child_healing",
+ CHLD_PROC_HEALED: "child_healed"
+};
+
+obj SysLurchEvent_t {
+ has eventTime: int;
+ has eventKind: EVENT;
+ has eventID: int;
+ has data1: int;
+ has data2: int;
+}
+
+obj WatchMen {
+ static has eventHead: int;
+ static has eventTail: int;
+ static has EventQue: List[bytes(SysLurchEvent_t)];
+
+ static def isBufferFull() -> bool {
+ report ((eventHead + 1) % MAX_EVENTS) == eventTail;
+ }
+}
+
+node Website {
+ has url: str;
+ has timemout: int;
+ has retry: int;
+ has script: str;
+
+ has:priv _globals: Dict;
+ has:priv _locals: Dict;
+
+ can run with Crawler entry {
+ exec(script, _globals, _locals);
+ # do stuff with the data
+ }
+}
+
+walker Crawler {
+ has data: Dict[str, ...];
+
+ can crawl with Website entry {
+ self.data[visit.url] = visit.data;
+ }
+}
+
+def fix_scrape_script(script: str) -> str by llm();
+
+with entry {
+ print("Hello, World!");
+}
diff --git a/internal/lcommon.go b/internal/lcommon.go
index d9c522f..d6f53d0 100644
--- a/internal/lcommon.go
+++ b/internal/lcommon.go
@@ -122,7 +122,7 @@ func WithFileLevel(level slog.Level) func(*options) {
/*
* =======================================================
- * Lurchers Event logger; logs every event from child proc
+ * Lurchers Event Logger; logs every event from child proc
* using the SPSC model
* =======================================================
*/
@@ -160,15 +160,16 @@ func (et EventType) String() string {
type SysLurchEvent_t struct {
EventTime int
- EventKind EventType
EventID int
Data1, Data2 int
+ EventKind EventType
}
type WatchMen struct {
- eventHead int
- eventTail int
- EventQue [MAX_EVENTS]SysLurchEvent_t
+ eventHead int
+ eventTail int
+ EventsTotal int
+ EventQue [MAX_EVENTS]SysLurchEvent_t
}
// single consumer
diff --git a/internal/procs/chldspawn.go b/internal/procs/chldspawn.go
index eb7db69..391581d 100644
--- a/internal/procs/chldspawn.go
+++ b/internal/procs/chldspawn.go
@@ -2,13 +2,40 @@ package procs
import (
"context"
- "os"
+ "fmt"
+ "os/exec"
"github.com/Wacky404/lurchers/internal/url"
)
+type Pid int
+
type Proc struct {
- ID *os.Process
- ctx context.Context
+ PID Pid
+ cmd *exec.Cmd
opts *url.UserParams
}
+
+func (p *Proc) NewChildProc(ctx context.Context, offset int) (Pid, error) {
+ // --offset is byte offset of ring buffer in mem file from mmap
+ // --options is packed json of user params for jac file to restrict job(s) to
+ // need to read docs on exec lib and spawning child procs in go
+ // howler --offset val1 --options val2
+ (*p).cmd = exec.Command("howler", fmt.Sprintf("--offset %d --options %v", offset, (*p).opts))
+ err := (*p).cmd.Start()
+ if err != nil {
+ return nil, err
+ }
+ (*p).PID = Pid((*p).cmd.Process.Pid)
+ return (*p).PID, nil
+}
+
+func NewProc(prog string, flags []string) *Proc {
+ if !prog {
+ prog = "howler"
+ }
+ return &Proc{
+ PID: nil,
+ cmd: exec.Command()
+ }
+}
diff --git a/internal/url/addr.go b/internal/url/addr.go
index 555a596..e69d200 100644
--- a/internal/url/addr.go
+++ b/internal/url/addr.go
@@ -1,16 +1,63 @@
package url
import (
+ "encoding/json"
"errors"
+ "fmt"
"strings"
+ "time"
)
type Websites_t []string
type UserParams struct {
- Websites Websites_t
- Timeout int
- Retry bool
+ Websites Websites_t `json:"websites"`
+ Timeout time.Duration `json:"timeout"`
+ Retry int8 `json:"retry"`
+}
+
+type options struct {
+ websites Websites_t
+ timeout time.Duration
+ retry int8
+}
+
+func NewUserParams(opts ...func(*options)) *UserParams {
+ // will fail on default, but allows for just setting
+ // websites field without worrying about other opts
+ o := options{
+ websites: nil,
+ timeout: 1 * time.Hour,
+ retry: 5,
+ }
+
+ for _, opt := range opts {
+ opt(&o)
+ }
+
+ return &UserParams{
+ Websites: o.websites,
+ Timeout: o.timeout,
+ Retry: o.retry,
+ }
+}
+
+func WithWebsites(w Websites_t) func(*options) {
+ return func(o *options) {
+ o.websites = w
+ }
+}
+
+func WithTimeout(t time.Duration) func(*options) {
+ return func(o *options) {
+ o.timeout = t
+ }
+}
+
+func WithRetry(r int8) func(*options) {
+ return func(o *options) {
+ o.retry = r
+ }
}
func (u *UserParams) NewWebsite(w string) (*Websites_t, error) {
@@ -30,3 +77,27 @@ func (u *UserParams) GetWebsites() (*Websites_t, error) {
return &(*u).Websites, nil
}
+
+func (u *UserParams) Package() ([]byte, error) {
+ // turn struct values into json byte array for child proc
+ if u == nil {
+ return nil, errors.New("package: userparams is not initialized")
+ } else if (*u).Websites == nil {
+ return nil, errors.New("package: websites empty")
+ }
+
+ packed, err := json.Marshal(u)
+ if err != nil {
+ return nil, err
+ }
+
+ return packed, nil
+}
+
+// TODO: deconstruct opts into a string for relay to chld proc cmd
+func (u *UserParams) DeconstructOpts() (string, error) {
+ if (*u).Retry == 0 || (*u).Timeout == 0 || (*u).Websites == nil {
+ return "", errors.New("deconstructopts: one or more options are nil")
+ }
+ return fmt.Sprintf("websites=[],timeout=%v,retry=%d", (*u).Timeout, (*u).Retry), nil
+}