summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--cmd/lurchers/main.go11
-rw-r--r--data/configs/indeed.go4
-rw-r--r--evade/agent.go14
-rw-r--r--evade/proxy.go18
-rw-r--r--howlers/.python-version1
-rw-r--r--howlers/README.md0
-rw-r--r--howlers/pyproject.toml14
-rw-r--r--howlers/src/__init__.py0
-rw-r--r--howlers/src/hello.jac3
-rw-r--r--internal/db/connect.go (renamed from db/connect.go)21
-rw-r--r--internal/db/sql.go (renamed from db/sql.go)0
-rw-r--r--internal/handler/err.go (renamed from handler/err.go)0
-rw-r--r--internal/handler/img.go (renamed from handler/img.go)0
-rw-r--r--internal/lcommon.go304
-rw-r--r--internal/procs/chldspawn.go14
-rw-r--r--internal/url/addr.go32
-rw-r--r--urls/addr.go30
-rw-r--r--util/env.go12
-rw-r--r--util/logger.go99
20 files changed, 396 insertions, 184 deletions
diff --git a/.gitignore b/.gitignore
index ca8fb85..afc37ad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,9 @@
*.so
*.dylib
+# JacLang things
+.jac/
+
# Test binary, built with `go test -c`
*.test
diff --git a/cmd/lurchers/main.go b/cmd/lurchers/main.go
index 85db16b..46a56ab 100644
--- a/cmd/lurchers/main.go
+++ b/cmd/lurchers/main.go
@@ -40,8 +40,19 @@ func main() {
// before making a request print "Visiting..."
i.C.OnRequest(func(r *colly.Request) {
+ r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
+ r.Headers.Set("Accept-Language", "en-US,en;q=0.5")
+ r.Headers.Set("Connection", "keep-alive")
+ r.Headers.Set("Upgrade-Insecure-Requests", "1")
+
slog.Info("Going to website", slog.String("Request URL", r.URL.String()))
})
+
+ // on error
+ i.C.OnError(func(r *colly.Response, err error) {
+ slog.Error("This is not working fam", slog.Any("Request URL", r.Request.URL), slog.Any("Response", r), slog.Any("error", err))
+ })
+
// start scraping on website(s)
i.C.Visit(i.Data.Posting.Url)
i.C.Wait()
diff --git a/data/configs/indeed.go b/data/configs/indeed.go
index d1ba4e4..2eb2bbc 100644
--- a/data/configs/indeed.go
+++ b/data/configs/indeed.go
@@ -34,5 +34,9 @@ func IndeedConfig() *CollyCfg {
slog.Info("Job Found", slog.String("Position", jobPosition))
})
+ cfg.C.OnResponse(func(r *colly.Response) {
+ slog.Info(string(r.Body))
+ })
+
return cfg
}
diff --git a/evade/agent.go b/evade/agent.go
deleted file mode 100644
index fae8bcc..0000000
--- a/evade/agent.go
+++ /dev/null
@@ -1,14 +0,0 @@
-package evade
-
-import (
- "context"
-
- "github.com/gocolly/colly"
- "github.com/gocolly/colly/extensions"
-)
-
-func NewUserAgent(ctx context.Context, c *colly.Collector) {
- extensions.RandomUserAgent(c)
-}
-
-
diff --git a/evade/proxy.go b/evade/proxy.go
deleted file mode 100644
index a388960..0000000
--- a/evade/proxy.go
+++ /dev/null
@@ -1,18 +0,0 @@
-package evade
-
-import (
- "fmt"
-
- "github.com/gocolly/colly"
- "github.com/gocolly/colly/proxy"
-)
-
-func RotateProxy(c *colly.Collector, proxies *[]string) error {
- rp, err := proxy.RoundRobinProxySwitcher(*proxies...)
- if err != nil {
- return fmt.Errorf("error setting up proxy switcher: %s", err.Error())
- }
- c.SetProxyFunc(rp)
-
- return nil
-}
diff --git a/howlers/.python-version b/howlers/.python-version
new file mode 100644
index 0000000..e4fba21
--- /dev/null
+++ b/howlers/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/howlers/README.md b/howlers/README.md
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/howlers/README.md
diff --git a/howlers/pyproject.toml b/howlers/pyproject.toml
new file mode 100644
index 0000000..3e50d19
--- /dev/null
+++ b/howlers/pyproject.toml
@@ -0,0 +1,14 @@
+[project]
+name = "howlers"
+version = "0.0.7"
+description = "Configuration for data collection jobs using JacLang."
+readme = "README.md"
+authors = [
+ { name = "Wayne-Cole", email = "77279425+Wacky404@users.noreply.github.com" }
+]
+requires-python = ">=3.12"
+dependencies = []
+
+[build-system]
+requires = ["uv_build>=0.9.16,<0.10.0"]
+build-backend = "uv_build"
diff --git a/howlers/src/__init__.py b/howlers/src/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/howlers/src/__init__.py
diff --git a/howlers/src/hello.jac b/howlers/src/hello.jac
new file mode 100644
index 0000000..2d18e52
--- /dev/null
+++ b/howlers/src/hello.jac
@@ -0,0 +1,3 @@
+with entry {
+ print("Hello, World!");
+}
diff --git a/db/connect.go b/internal/db/connect.go
index 2e7123d..2588e63 100644
--- a/db/connect.go
+++ b/internal/db/connect.go
@@ -7,8 +7,7 @@ import (
"log/slog"
"strconv"
- dbSql "github.com/Wacky404/lurchers/db"
- "github.com/Wacky404/lurchers/util"
+ "github.com/Wacky404/lurchers/internal"
)
type Database struct {
@@ -19,35 +18,35 @@ type Database struct {
user string
password string
name string
- sql *dbSql.DbStatements
+ sql *DbStatements
}
// creates an instance of the Database struct and loads in env vars
func LoadConfig(ctx *context.Context) (*Database, error) {
- port, err := strconv.ParseInt(util.GetVar("DB_PORT", ""), 10, 64)
+ port, err := strconv.ParseInt(internal.GetVar("DB_PORT", ""), 10, 64)
if err != nil {
slog.Error("error loading .env var port", slog.Any("error", err))
return nil, err
}
db := &Database{
ctx: ctx,
- host: util.GetVar("DB_HOST", ""),
+ host: internal.GetVar("DB_HOST", ""),
port: port,
- user: util.GetVar("DB_USER", ""),
- password: util.GetVar("DB_PASSWORD", ""),
- name: util.GetVar("DB_NAME", ""),
- sql: dbSql.NewDbStatements(),
+ user: internal.GetVar("DB_USER", ""),
+ password: internal.GetVar("DB_PASSWORD", ""),
+ name: internal.GetVar("DB_NAME", ""),
+ sql: NewDbStatements(),
}
return db, nil
}
// connecting to the precious
-func (d *Database) Connect() error {
+func (d *Database) Connect(service string) error {
psqlInfo := fmt.Sprintf("host=%s port=%d user=%s "+
"password=%s dbname=%s sslmode=disable",
d.host, d.port, d.user, d.password, d.name)
- conn, err := sql.Open("postgres", psqlInfo)
+ conn, err := sql.Open(service, psqlInfo)
if err != nil {
slog.Error("error connecting to the database", slog.Any("error", err))
}
diff --git a/db/sql.go b/internal/db/sql.go
index 4a9acfb..4a9acfb 100644
--- a/db/sql.go
+++ b/internal/db/sql.go
diff --git a/handler/err.go b/internal/handler/err.go
index 8160a2b..8160a2b 100644
--- a/handler/err.go
+++ b/internal/handler/err.go
diff --git a/handler/img.go b/internal/handler/img.go
index 858e52f..858e52f 100644
--- a/handler/img.go
+++ b/internal/handler/img.go
diff --git a/internal/lcommon.go b/internal/lcommon.go
new file mode 100644
index 0000000..d9c522f
--- /dev/null
+++ b/internal/lcommon.go
@@ -0,0 +1,304 @@
+/*
+ * [https://cs.opensource.google/go/x/exp/+/master:mmap/mmap_unix.go]
+ * for mmap implementation that I borrowed
+ */
+package internal
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "log/slog"
+ "os"
+ "runtime"
+ "syscall"
+)
+
+func GetVar(key string, fallback string) string {
+ val, ok := os.LookupEnv(key)
+ if !ok {
+ return fallback
+ }
+
+ return val
+}
+
+/*
+ * =====================================================
+ * Lurchers json logger; two handers for file and stdout
+ * =====================================================
+ */
+type MultiHandler struct {
+ handlers []slog.Handler
+}
+
+func NewMultiHandler(handlers ...slog.Handler) *MultiHandler {
+ return &MultiHandler{handlers: handlers}
+}
+
+func (m *MultiHandler) Enabled(ctx context.Context, level slog.Level) bool {
+ for _, h := range m.handlers {
+ if h.Enabled(ctx, level) {
+ return true
+ }
+ }
+ return false
+}
+
+func (m *MultiHandler) Handle(ctx context.Context, record slog.Record) error {
+ for _, h := range m.handlers {
+ _ = h.Handle(ctx, record) // Process each handler, ignoring errors
+ }
+ return nil
+}
+
+func (m *MultiHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
+ newHandlers := make([]slog.Handler, len(m.handlers))
+ for i, h := range m.handlers {
+ newHandlers[i] = h.WithAttrs(attrs)
+ }
+ return &MultiHandler{handlers: newHandlers}
+}
+
+func (m *MultiHandler) WithGroup(name string) slog.Handler {
+ newHandlers := make([]slog.Handler, len(m.handlers))
+ for i, h := range m.handlers {
+ newHandlers[i] = h.WithGroup(name)
+ }
+ return &MultiHandler{handlers: newHandlers}
+}
+
+type options struct {
+ logName string
+ stdoutLevel slog.Level
+ fileLevel slog.Level
+}
+
+func SetupLogger(opts ...func(*options)) (*os.File, error) {
+ o := options{
+ logName: "logs/Default.log",
+ stdoutLevel: slog.LevelInfo,
+ fileLevel: slog.LevelDebug,
+ }
+
+ for _, opt := range opts {
+ opt(&o)
+ }
+
+ logFile, err := os.OpenFile(o.logName, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o666)
+ if err != nil {
+ return nil, fmt.Errorf("no file opened for logs: %v", err)
+ }
+
+ stdoutHandler := slog.NewTextHandler(os.Stdout,
+ &slog.HandlerOptions{Level: o.stdoutLevel})
+ fileHandler := slog.NewJSONHandler(logFile,
+ &slog.HandlerOptions{Level: o.fileLevel})
+
+ logger := slog.New(NewMultiHandler(stdoutHandler, fileHandler))
+ slog.SetDefault(logger)
+
+ return logFile, err
+}
+
+func WithLogName(name string) func(*options) {
+ return func(o *options) {
+ o.logName = name
+ }
+}
+
+func WithStdoutLevel(level slog.Level) func(*options) {
+ return func(o *options) {
+ o.stdoutLevel = level
+ }
+}
+
+func WithFileLevel(level slog.Level) func(*options) {
+ return func(o *options) {
+ o.fileLevel = level
+ }
+}
+
+/*
+ * =======================================================
+ * Lurchers Event logger; logs every event from child proc
+ * using the SPSC model
+ * =======================================================
+ */
+const (
+ MAX_EVENTS int = 256
+ MAX_CHLD_PROC int = 10000
+ SIZE_RING_BUFF int = 16384
+ SIZE_FILE int = SIZE_RING_BUFF * MAX_CHLD_PROC // 160 mb
+ debug bool = false
+)
+
+type EventType int
+
+const (
+ CHLD_PROC_START = iota
+ CHLD_PROC_DONE
+ CHLD_PROC_FAILED
+ CHLD_PROC_HURT
+ CHLD_PROC_HEALING
+ CHLD_PROC_HEALED
+)
+
+var eventName = map[EventType]string{
+ CHLD_PROC_START: "child_start",
+ CHLD_PROC_DONE: "child_done",
+ CHLD_PROC_FAILED: "child_failed",
+ CHLD_PROC_HURT: "child_hurt",
+ CHLD_PROC_HEALING: "child_healing",
+ CHLD_PROC_HEALED: "child_healed",
+}
+
+func (et EventType) String() string {
+ return eventName[et]
+}
+
+type SysLurchEvent_t struct {
+ EventTime int
+ EventKind EventType
+ EventID int
+ Data1, Data2 int
+}
+
+type WatchMen struct {
+ eventHead int
+ eventTail int
+ EventQue [MAX_EVENTS]SysLurchEvent_t
+}
+
+// single consumer
+func (w *WatchMen) SysGetEvent() *SysLurchEvent_t {
+ if (*w).eventHead == (*w).eventTail {
+ return nil // buffer is empty
+ }
+
+ ev := &(*w).EventQue[(*w).eventTail]
+ (*w).eventTail = ((*w).eventTail + 1) % MAX_EVENTS
+
+ return ev
+}
+
+// ReaderAt reads a memory-mapped file (.mem)
+// Like any io.ReaderAt, clients can execute parallel ReadAt calls, but it is
+// not safe to call Close and reading methods concurrently.
+type ReaderAt struct {
+ data []byte
+}
+
+// implements the io.ReaderAt interface
+func (r *ReaderAt) ReadAt(p []byte, offset int64) (int, error) {
+ if r.data == nil {
+ return 0, errors.New("mmap: closed")
+ }
+ if offset < 0 || int64(len(r.data)) < offset {
+ return 0, fmt.Errorf("mmap: invalid ReadAt offset %d", offset)
+ }
+
+ n := copy(p, r.data[offset:])
+ if n < len(p) {
+ return n, io.EOF
+ }
+
+ return n, nil
+}
+
+func (r *ReaderAt) Close() error {
+ if r.data == nil {
+ return nil
+ } else if len(r.data) == 0 {
+ r.data = nil
+ return nil
+ }
+
+ data := r.data
+ r.data = nil
+ if debug {
+ var p *byte
+ if len(data) != 0 {
+ p = &data[0]
+ }
+ println("munmap", r, p)
+ }
+
+ runtime.SetFinalizer(r, nil)
+
+ return syscall.Munmap(data)
+}
+
+func Open(filename string) (*ReaderAt, error) {
+ // just opening a file that will store
+ // bytes of data that parent proc and
+ // and child proc(s) share
+ f, err := os.Open(filename)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ fs, err := f.Stat()
+ if err != nil {
+ return nil, err
+ }
+
+ // size of the memory file in question
+ size := fs.Size()
+ if size == 0 {
+ // Treat (size == 0) as a special case, truncating the
+ // file to the specified file size, in the case that the
+ // file is new; upon first run of program.
+ f.Truncate(int64(SIZE_FILE))
+ }
+ if size < 0 {
+ return nil, fmt.Errorf("mmap: file %q has negative size", filename)
+ }
+ if size != int64(int(size)) {
+ return nil, fmt.Errorf("mmap: file %q is too large", filename)
+ }
+
+ data, err := syscall.Mmap(int(f.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED)
+ if err != nil {
+ return nil, err
+ }
+ r := &ReaderAt{data}
+ if debug {
+ var p *byte
+ if len(data) != 0 {
+ p = &data[0]
+ }
+ println("mmap", r, p)
+ }
+
+ runtime.SetFinalizer(r, (*ReaderAt).Close)
+
+ return r, nil
+}
+
+/*
+func isBufferFull(eventHead int, eventTail int) bool {
+ return ((eventHead + 1) % MAX_EVENTS) == eventTail
+}
+
+// single producer
+func SysQueueEvent(time int, kind EventType, id int, data1 int, data2 int) error {
+ if IsBufferFull() {
+ // overflow protection
+ fmt.Printf("warning: Event queue overflow! Event %s dropped\n", kind.String())
+ return fmt.Errorf("warning: Event queue overflow! Event %s dropped", kind.String())
+ }
+
+ EventQue[eventHead].EventTime = time
+ EventQue[eventHead].EventKind = kind
+ EventQue[eventHead].EventID = id
+ EventQue[eventHead].Data1 = data1
+ EventQue[eventHead].Data2 = data2
+
+ eventHead = (eventHead + 1) % MAX_EVENTS
+
+ return nil
+}
+*/
diff --git a/internal/procs/chldspawn.go b/internal/procs/chldspawn.go
new file mode 100644
index 0000000..eb7db69
--- /dev/null
+++ b/internal/procs/chldspawn.go
@@ -0,0 +1,14 @@
+package procs
+
+import (
+ "context"
+ "os"
+
+ "github.com/Wacky404/lurchers/internal/url"
+)
+
+type Proc struct {
+ ID *os.Process
+ ctx context.Context
+ opts *url.UserParams
+}
diff --git a/internal/url/addr.go b/internal/url/addr.go
new file mode 100644
index 0000000..555a596
--- /dev/null
+++ b/internal/url/addr.go
@@ -0,0 +1,32 @@
+package url
+
+import (
+ "errors"
+ "strings"
+)
+
+type Websites_t []string
+
+type UserParams struct {
+ Websites Websites_t
+ Timeout int
+ Retry bool
+}
+
+func (u *UserParams) NewWebsite(w string) (*Websites_t, error) {
+ if !strings.HasPrefix(w, "http://") || !strings.HasPrefix(w, "https://") {
+ return &(*u).Websites, errors.New("url entered is not an http or https url")
+ }
+
+ u.Websites[len((*u).Websites)] = w
+
+ return &(*u).Websites, nil
+}
+
+func (u *UserParams) GetWebsites() (*Websites_t, error) {
+ if (*u).Websites == nil {
+ return nil, errors.New("no websites given in user params")
+ }
+
+ return &(*u).Websites, nil
+}
diff --git a/urls/addr.go b/urls/addr.go
deleted file mode 100644
index 6de7d3a..0000000
--- a/urls/addr.go
+++ /dev/null
@@ -1,30 +0,0 @@
-package url
-
-import (
- "errors"
- "strings"
-)
-
-type Websites map[string]string
-
-type userParams struct {
- websites Websites
- timeout int
- retry bool
-}
-
-func NewWebsite(w string) (string, error) {
- if strings.HasPrefix(w, "http://") || strings.HasPrefix(w, "https://") {
- return w, nil
- }
-
- return w, errors.New("url entered is not an http or https url")
-}
-
-func (u *userParams) Getwebsites() (map[string]string, error) {
- if u.websites == nil {
- return nil, errors.New("no websites given in user params")
- }
-
- return u.websites, nil
-}
diff --git a/util/env.go b/util/env.go
deleted file mode 100644
index 8c59052..0000000
--- a/util/env.go
+++ /dev/null
@@ -1,12 +0,0 @@
-package util
-
-import "os"
-
-func GetVar(key string, fallback string) string {
- val, ok := os.LookupEnv(key)
- if !ok {
- return fallback
- }
-
- return val
-}
diff --git a/util/logger.go b/util/logger.go
deleted file mode 100644
index 94d0563..0000000
--- a/util/logger.go
+++ /dev/null
@@ -1,99 +0,0 @@
-package util
-
-import (
- "context"
- "fmt"
- "log/slog"
- "os"
-)
-
-type MultiHandler struct {
- handlers []slog.Handler
-}
-
-func NewMultiHandler(handlers ...slog.Handler) *MultiHandler {
- return &MultiHandler{handlers: handlers}
-}
-
-func (m *MultiHandler) Enabled(ctx context.Context, level slog.Level) bool {
- for _, h := range m.handlers {
- if h.Enabled(ctx, level) {
- return true
- }
- }
- return false
-}
-
-func (m *MultiHandler) Handle(ctx context.Context, record slog.Record) error {
- for _, h := range m.handlers {
- _ = h.Handle(ctx, record) // Process each handler, ignoring errors
- }
- return nil
-}
-
-func (m *MultiHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
- newHandlers := make([]slog.Handler, len(m.handlers))
- for i, h := range m.handlers {
- newHandlers[i] = h.WithAttrs(attrs)
- }
- return &MultiHandler{handlers: newHandlers}
-}
-
-func (m *MultiHandler) WithGroup(name string) slog.Handler {
- newHandlers := make([]slog.Handler, len(m.handlers))
- for i, h := range m.handlers {
- newHandlers[i] = h.WithGroup(name)
- }
- return &MultiHandler{handlers: newHandlers}
-}
-
-type options struct {
- logName string
- stdoutLevel slog.Level
- fileLevel slog.Level
-}
-
-func SetupLogger(opts ...func(*options)) (*os.File, error) {
- o := options{
- logName: "logs/Default.log",
- stdoutLevel: slog.LevelInfo,
- fileLevel: slog.LevelDebug,
- }
-
- for _, opt := range opts {
- opt(&o)
- }
-
- logFile, err := os.OpenFile(o.logName, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o666)
- if err != nil {
- return nil, fmt.Errorf("no file opened for logs: %v", err)
- }
-
- stdoutHandler := slog.NewTextHandler(os.Stdout,
- &slog.HandlerOptions{Level: o.stdoutLevel})
- fileHandler := slog.NewJSONHandler(logFile,
- &slog.HandlerOptions{Level: o.fileLevel})
-
- logger := slog.New(NewMultiHandler(stdoutHandler, fileHandler))
- slog.SetDefault(logger)
-
- return logFile, err
-}
-
-func WithLogName(name string) func(*options) {
- return func(o *options) {
- o.logName = name
- }
-}
-
-func WithStdoutLevel(level slog.Level) func(*options) {
- return func(o *options) {
- o.stdoutLevel = level
- }
-}
-
-func WithFileLevel(level slog.Level) func(*options) {
- return func(o *options) {
- o.fileLevel = level
- }
-}