summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWayne-Cole <77279425+Wacky404@users.noreply.github.com>2025-05-19 22:09:45 -0500
committerWayne-Cole <77279425+Wacky404@users.noreply.github.com>2025-05-19 22:09:45 -0500
commita3151074722294f050e8d37fa973c55ed93ffdd2 (patch)
tree5bbe06f3e8e05e30d0c132e3ddad51f47a09524a
parent291b2680ce39f1bf8224520742c19935d85cdd54 (diff)
downloadlurchers-a3151074722294f050e8d37fa973c55ed93ffdd2.tar.xz
lurchers-a3151074722294f050e8d37fa973c55ed93ffdd2.zip
feat: testing first job
-rw-r--r--Makefile8
-rw-r--r--cmd/lurchers/main.go19
-rw-r--r--data/configs/indeed.go34
-rw-r--r--data/jobs.go38
-rw-r--r--data/literature.go66
5 files changed, 103 insertions, 62 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5867a0b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,8 @@
+BINARY=bin/lurchers
+SRC=cmd/lurchers/main.go
+
+run: $(BINARY)
+ ./$(BINARY)
+
+$(BINARY): $(SRC)
+ go build -o $(BINARY) $(SRC)
diff --git a/cmd/lurchers/main.go b/cmd/lurchers/main.go
index 2a127f8..85db16b 100644
--- a/cmd/lurchers/main.go
+++ b/cmd/lurchers/main.go
@@ -6,6 +6,8 @@ import (
"log/slog"
"time"
+ "github.com/Wacky404/lurchers/data/configs"
+ "github.com/Wacky404/lurchers/evade"
"github.com/Wacky404/lurchers/util"
"github.com/gocolly/colly"
"github.com/joho/godotenv"
@@ -27,11 +29,20 @@ func main() {
slog.Error("error loading .env file", slog.Any("error", err))
}
+ // our buffed collector for indeed
+ i := configs.IndeedConfig()
+ evade.NewUserAgent(ctx, i.C)
+ proxies := []string{util.GetVar("TOR", "socks5://127.0.0.1:9050")}
+ err = evade.RotateProxy(i.C, &proxies)
+ if err != nil {
+ slog.Error("error configuring the RotateProxy", slog.Any("error", err))
+ }
+
// before making a request print "Visiting..."
- c.OnRequest(func(r *colly.Request) {
- slog.Info("Visiting", slog.String("Request URL", r.URL.String()))
+ i.C.OnRequest(func(r *colly.Request) {
+ slog.Info("Going to website", slog.String("Request URL", r.URL.String()))
})
// start scraping on website(s)
- c.Visit("https://store.crunchyroll.com/collections/manga-books/?srule=Most-Popular")
- c.Wait()
+ i.C.Visit(i.Data.Posting.Url)
+ i.C.Wait()
}
diff --git a/data/configs/indeed.go b/data/configs/indeed.go
index 3fad0fd..d1ba4e4 100644
--- a/data/configs/indeed.go
+++ b/data/configs/indeed.go
@@ -1,20 +1,38 @@
package configs
import (
+ "log/slog"
+
"github.com/Wacky404/lurchers/data"
+
"github.com/gocolly/colly"
)
-func indeedConfig() *colly.Collector {
- job := new(data.Job)
- c := colly.NewCollector()
- c.OnHTML("a[id^='job_']", func(e *colly.HTMLElement) {
+type CollyCfg struct {
+ C *colly.Collector
+ Data *data.Job
+}
+
+func newCollyCfg() *CollyCfg {
+ return &CollyCfg{
+ C: colly.NewCollector(colly.Async(true)),
+ Data: data.NewJob(),
+ }
+}
+
+func IndeedConfig() *CollyCfg {
+ cfg := newCollyCfg()
+ cfg.Data.Posting.Website = "https://indeed.com/"
+ // testing this out; will need to build this
+ cfg.Data.Posting.Url = "https://www.indeed.com/jobs?q=%2B&l=Little+Rock%2C+AR&fromage=7&salaryType=%2440%2C000%2B&radius=5&jlid=68f779f7b0e38e09&rbl=Little+Rock%2C+AR&from=searchOnDesktopSerp&vjk=7a14f77130c03202"
+ cfg.C.OnHTML("a[id^='job_']", func(e *colly.HTMLElement) {
link := e.Attr("href")
- c.Visit(e.Request.AbsoluteURL(link))
+ cfg.C.Visit(e.Request.AbsoluteURL(link))
})
- c.OnHTML("h1[class^='jobserch-JobInfoHeader-title']", func(e *colly.HTMLElement) {
-
+ cfg.C.OnHTML("h1[class^='jobserch-JobInfoHeader-title']", func(e *colly.HTMLElement) {
+ jobPosition := e.Text
+ slog.Info("Job Found", slog.String("Position", jobPosition))
})
- return c
+ return cfg
}
diff --git a/data/jobs.go b/data/jobs.go
index c47d142..9984cbe 100644
--- a/data/jobs.go
+++ b/data/jobs.go
@@ -6,27 +6,31 @@ import (
)
type Job struct {
- posting JobPosting
- details JobDetails
+ Posting *JobPosting
+ Details *JobDetails
+}
+
+func NewJob() *Job {
+ return &Job{Posting: &JobPosting{}, Details: &JobDetails{}}
}
type JobPosting struct {
- website string
- url string
- location string
- company string
- position string
- jobType string
- workShift string
- workSetting string
- lastModified time.Time
+ Website string
+ Url string
+ Location string
+ Company string
+ Position string
+ JobType string
+ WorkShift string
+ WorkSetting string
+ LastModified time.Time
}
type JobDetails struct {
- skills json.Marshaler
- licenses json.Marshaler
- certs json.Marshaler
- education json.Marshaler
- benefits json.Marshaler
- fullJobDescription string
+ Skills json.Marshaler
+ Licenses json.Marshaler
+ Certs json.Marshaler
+ Education json.Marshaler
+ Benefits json.Marshaler
+ FullJobDescription string
}
diff --git a/data/literature.go b/data/literature.go
index f984bd0..ff6c4ec 100644
--- a/data/literature.go
+++ b/data/literature.go
@@ -6,49 +6,49 @@ import (
)
type Author struct {
- name string
- description string
- photo image.Image
+ Name string
+ Description string
+ Photo image.Image
}
type Category struct {
- name string
- subCategory SubCategory
+ Name string
+ SubCategory SubCategory
}
type SubCategory struct {
- name string
+ Name string
}
type Publisher struct {
- name string
- logo image.Image
+ Name string
+ Logo image.Image
}
type Book struct {
- title string
- author Author
- category Category
- series string
- description string
- cover image.Image
- publisher Publisher
- year_published uint16
- print_length uint16
- language string
- isbn_10 uint32
- isbn_13 uint32
- reviews string
- rating float32
- hcPriceUS map[string]uint16
- hcPriceCAN map[string]uint16
- pbPriceUS map[string]uint16
- pbPriceCAN map[string]uint16
- ebPriceUS map[string]uint16
- ebPriceCAN map[string]uint16
- audioPriceUS uint16
- audioPriceCAN uint16
- priceLastModified time.Time
- cheapestPrice map[string]uint16
- cheapPriceLastModified time.Time
+ Title string
+ Author Author
+ Category Category
+ Series string
+ Description string
+ Cover image.Image
+ Publisher Publisher
+ Year_published uint16
+ Print_length uint16
+ Language string
+ Isbn_10 uint32
+ Isbn_13 uint32
+ Reviews string
+ Rating float32
+ HcPriceUS map[string]uint16
+ HcPriceCAN map[string]uint16
+ PbPriceUS map[string]uint16
+ PbPriceCAN map[string]uint16
+ EbPriceUS map[string]uint16
+ EbPriceCAN map[string]uint16
+ AudioPriceUS uint16
+ AudioPriceCAN uint16
+ PriceLastModified time.Time
+ CheapestPrice map[string]uint16
+ CheapPriceLastModified time.Time
}